Background

This document is to provide some plotting examples for reference.


Example #1: US State and County Maps

The package usmap contains maps of US states and counties. There is also some associated data available about state and county demographics.

Example code includes:

library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.4
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ---------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
# Population by county
data(countypop, package="usmap")
# Population by state
data(statepop, package="usmap")
# Poverty rate by county
data(countypov, package="usmap")
# Poverty rate by state
data(statepov, package="usmap")
# Population of largest city by state
data(citypop, package="usmap")
# Location of earthquakes
data(earthquakes, package="usmap")


# Included datasets
countypop
## # A tibble: 3,142 x 4
##    fips  abbr  county          pop_2015
##    <chr> <chr> <chr>              <dbl>
##  1 01001 AL    Autauga County     55347
##  2 01003 AL    Baldwin County    203709
##  3 01005 AL    Barbour County     26489
##  4 01007 AL    Bibb County        22583
##  5 01009 AL    Blount County      57673
##  6 01011 AL    Bullock County     10696
##  7 01013 AL    Butler County      20154
##  8 01015 AL    Calhoun County    115620
##  9 01017 AL    Chambers County    34123
## 10 01019 AL    Cherokee County    25859
## # ... with 3,132 more rows
statepop
## # A tibble: 51 x 4
##    fips  abbr  full                 pop_2015
##    <chr> <chr> <chr>                   <dbl>
##  1 01    AL    Alabama               4858979
##  2 02    AK    Alaska                 738432
##  3 04    AZ    Arizona               6828065
##  4 05    AR    Arkansas              2978204
##  5 06    CA    California           39144818
##  6 08    CO    Colorado              5456574
##  7 09    CT    Connecticut           3590886
##  8 10    DE    Delaware               945934
##  9 11    DC    District of Columbia   672228
## 10 12    FL    Florida              20271272
## # ... with 41 more rows
countypov
## # A tibble: 3,142 x 4
##    fips  abbr  county          pct_pov_2014
##    <chr> <chr> <chr>                  <dbl>
##  1 01001 AL    Autauga County          13.1
##  2 01003 AL    Baldwin County          13  
##  3 01005 AL    Barbour County          25.4
##  4 01007 AL    Bibb County             18.1
##  5 01009 AL    Blount County           17.5
##  6 01011 AL    Bullock County          35.1
##  7 01013 AL    Butler County           25  
##  8 01015 AL    Calhoun County          20.5
##  9 01017 AL    Chambers County         21.3
## 10 01019 AL    Cherokee County         18.6
## # ... with 3,132 more rows
statepov
## # A tibble: 51 x 4
##    fips  abbr  full                 pct_pov_2014
##    <chr> <chr> <chr>                       <dbl>
##  1 01    AL    Alabama                      19.2
##  2 02    AK    Alaska                       11.4
##  3 04    AZ    Arizona                      18.2
##  4 05    AR    Arkansas                     18.7
##  5 06    CA    California                   16.4
##  6 08    CO    Colorado                     12.1
##  7 09    CT    Connecticut                  10.8
##  8 10    DE    Delaware                     13  
##  9 11    DC    District of Columbia         18.4
## 10 12    FL    Florida                      16.6
## # ... with 41 more rows
citypop
## # A tibble: 51 x 6
##       lon   lat state                abbr  most_populous_city city_pop
##     <dbl> <dbl> <chr>                <chr> <chr>                 <dbl>
##  1  -86.8  33.6 Alabama              AL    Birmingham           212237
##  2 -150.   61.2 Alaska               AK    Anchorage            291826
##  3 -112.   33.4 Arizona              AZ    Phoenix             1445632
##  4  -92.3  34.7 Arkansas             AR    Little Rock          193524
##  5 -118.   34.0 California           CA    Los Angeles         3792621
##  6 -105.   39.8 Colorado             CO    Denver               600158
##  7  -73.2  41.2 Connecticut          CT    Bridgeport           144229
##  8  -75.6  39.8 Delaware             DE    Wilmington            70851
##  9  -77.0  38.9 District of Columbia DC    Washington           693972
## 10  -81.7  30.3 Florida              FL    Jacksonville         880619
## # ... with 41 more rows
earthquakes
## # A tibble: 2,254 x 3
##      lon   lat   mag
##    <dbl> <dbl> <dbl>
##  1 -118.  35.7  2.79
##  2 -118.  36.3  3.1 
##  3 -118.  36.2  2.74
##  4 -125.  40.4  2.77
##  5 -118.  35.7  2.59
##  6 -118.  35.7  2.82
##  7 -118.  35.7  2.78
##  8 -124.  40.3  3.42
##  9 -124.  41.7  2.5 
## 10 -119.  35.3  2.58
## # ... with 2,244 more rows
# Basic, empty US maps
usmap::plot_usmap(regions="states")

usmap::plot_usmap(regions="counties")

# Basic, empty US maps subsetted to an area
usmap::plot_usmap(regions="states", 
                  include=c("WA", "OR", "CA", "NV", "ID", "MT", "WY", "UT", "CO", "AZ", "NM")
                  )

usmap::plot_usmap(regions="counties", include=c("MN", "WI", "MI", "OH", "PA", "NY", "IN", "IL"))

# Basic, subsetted state map with poverty rates included
usmap::plot_usmap(regions="states", 
                  include=c("WA", "OR", "CA", "NV", "ID", "MT", "WY", "UT", "CO", "AZ", "NM"), 
                  values="pct_pov_2014", data=statepov, labels=TRUE
                  ) + 
    scale_fill_continuous(low="lightblue", high="darkblue", "Poverty Rate (%)") + 
    labs(title="Poverty Rates by Western and Mountain States")

# Basic, subsetted county map with poverty rates included
usmap::plot_usmap(regions="counties", include=c("MN", "WI", "MI", "OH", "PA", "NY", "IN", "IL"),
                  values="pct_pov_2014", data=countypov, labels=FALSE
                  ) + 
    scale_fill_continuous(low="lightblue", high="darkblue", "Poverty Rate (%)") + 
    labs(title="Poverty Rates by County in Great Lakes States")

Example #2: Converting and adding lat/lon data

The latitude and longitude data can be converted to a form suitable for usmap by using the usmap_transform function.

Example code includes:

# Transform the earthquakes data
trQuakes <- usmap::usmap_transform(earthquakes)
str(trQuakes)
## 'data.frame':    2254 obs. of  5 variables:
##  $ lon  : num  -118 -118 -118 -125 -118 ...
##  $ lat  : num  35.7 36.3 36.2 40.4 35.7 ...
##  $ mag  : num  2.79 3.1 2.74 2.77 2.59 2.82 2.78 3.42 2.5 2.58 ...
##  $ lon.1: num  -1575319 -1632293 -1595342 -2065325 -1574008 ...
##  $ lat.1: num  -872720 -785212 -811800 -197614 -867723 ...
# Add as a layer to the state map
usmap::plot_usmap(regions="states") + 
    geom_point(data=trQuakes, aes(x=lon.1, y=lat.1, size=mag), alpha=0.4) + 
    labs(title="Earthquakes of Magnitude 2.5+ (H1 2019)")

# Transform the largest city data
trCity <- usmap::usmap_transform(citypop)
str(trCity)
## 'data.frame':    51 obs. of  8 variables:
##  $ lon               : num  -86.8 -112.1 -92.3 -118.2 -104.9 ...
##  $ lat               : num  33.6 33.5 34.7 34 39.8 ...
##  $ state             : chr  "Alabama" "Arizona" "Arkansas" "California" ...
##  $ abbr              : chr  "AL" "AZ" "AR" "CA" ...
##  $ most_populous_city: chr  "Birmingham" "Phoenix" "Little Rock" "Los Angeles" ...
##  $ city_pop          : num  212237 1445632 193524 3792621 600158 ...
##  $ lon.1             : num  1220905 -1120927 702478 -1673156 -417275 ...
##  $ lat.1             : num  -1165156 -1202575 -1107534 -1034842 -570173 ...
# Add as a layer to the state map
usmap::plot_usmap(regions="states") + 
    geom_point(data=trCity, aes(x=lon.1, y=lat.1, size=city_pop)) + 
    labs(title="Largest City by State")

Example #3: Filtering and coloring by region

The census region definitions are included, and can be used to filter or color the maps.

Example code includes:

# Filter the map to include only new_england, mid_atlantic, and south_atlantic
usmap::plot_usmap(regions="states", 
                  include=c(usmap::.new_england, usmap::.mid_atlantic, usmap::.south_atlantic)
                  )

# Create regions data for US states
regionData <- usmap::statepop %>%
    mutate(region=as.factor(ifelse(abbr %in% usmap::.midwest_region, 1, 0)))
usmap::plot_usmap(regions="states", data=regionData, values="region") + 
    scale_fill_discrete("Midwest") + 
    labs(title="Midwest Region US States")

# Enhanced Coloring and Labelling
usmap::plot_usmap(regions="states", data=regionData, values="region") + 
    scale_fill_manual(values=c("lightgray", "lightblue"), "", labels=c("Other", "Midwest")) + 
    labs(title="Midwest Region US States")

Example #4: Labelling geographies

Since usmap is built on ggplot2, the standard techniques from ggplot2 can be used to enhance the geography labelling. Further, centroids for the geographies are available in loadable files.

Example code includes:

# Base state map labelled with defaults
usmap::plot_usmap(regions="states", labels=TRUE, label_color="red")

# Base county map labelled with defaults
usmap::plot_usmap(regions="counties", include=c("TX", "OK"), labels=TRUE, label_color="grey")

# Load state centroid data
stCenter <- utils::read.csv(system.file("extdata", 
                                        paste0("us_", "states", "_centroids.csv"), package = "usmap"
                                        ),
                            colClasses = c(rep("numeric", 2), rep("character", 3)), stringsAsFactors = FALSE
                            )

# Load county centroid data
ctCenter <- utils::read.csv(system.file("extdata", 
                                        paste0("us_", "counties", "_centroids.csv"), package = "usmap"
                                        ),
                            colClasses = c(rep("numeric", 2), rep("character", 4)), stringsAsFactors = FALSE
                            )

# Add state labels using geom_text
regionData <- usmap::statepop %>%
    mutate(region=as.factor(ifelse(abbr %in% usmap::.midwest_region, 1, 0))) %>%
    left_join(stCenter %>% select(x, y, full, fips) %>% rename(fname=full)) %>%
    mutate(fname=ifelse(fname=="District of Columbia", "DC", str_replace_all(fname, " ", "\n")))
## Joining, by = "fips"
usmap::plot_usmap(regions="states", data=regionData[, c("fips", "region")], values="region") + 
    scale_fill_manual(values=c("lightgray", "lightblue"), "", labels=c("Other", "Midwest")) + 
    labs(title="Midwest Region US States") + 
    geom_text(data=filter(regionData, region==1), aes(x=x, y=y, label=fname), size=2.5)

# Add county labels using geom_text
regionData <- usmap::countypop %>%
    mutate(region=as.factor(case_when(abbr=="OK" ~ 1, abbr=="TX" ~ 2, TRUE ~ 0))) %>%
    left_join(ctCenter %>% select(x, y, county, fips) %>% rename(cname=county)) %>%
    mutate(cname=str_replace_all(str_replace(cname, " County", ""), " ", "\n"))
## Joining, by = "fips"
usmap::plot_usmap(regions="counties", include=c("TX", "OK"), 
                  data=regionData[, c("fips", "region")], values="region") + 
    scale_fill_manual(values=c("red", "orange"), "", labels=c("Oklahoma", "Texas")) + 
    labs(title="Texas and Oklahoma Counties") + 
    geom_text(data=filter(regionData, abbr %in% c("TX", "OK")), 
              aes(x=x, y=y, label=cname), size=2.5, 
              color=ifelse(pull(filter(regionData, abbr %in% c("TX", "OK")), abbr)=="OK", "white", "black")
              )

Example #5: Adding population centers

Separate data exists for key population centers, which can be loaded and then added to maps.

Example code includes:

# Transform the largest city data
str(maps::us.cities)
## 'data.frame':    1005 obs. of  6 variables:
##  $ name       : chr  "Abilene TX" "Akron OH" "Alameda CA" "Albany GA" ...
##  $ country.etc: chr  "TX" "OH" "CA" "GA" ...
##  $ pop        : int  113888 206634 70069 75510 93576 45535 494962 44933 127159 88857 ...
##  $ lat        : num  32.5 41.1 37.8 31.6 42.7 ...
##  $ long       : num  -99.7 -81.5 -122.3 -84.2 -73.8 ...
##  $ capital    : int  0 0 0 0 2 0 0 0 0 0 ...
trCity <- usmap::usmap_transform(select(maps::us.cities, long, lat, everything())) %>% 
    mutate(useName=str_replace_all(str_sub(name, 1, -4), " ", "\n"))
str(trCity)
## 'data.frame':    1005 obs. of  9 variables:
##  $ long       : num  -99.7 -81.5 -122.3 -84.2 -73.8 ...
##  $ lat        : num  32.5 41.1 37.8 31.6 42.7 ...
##  $ name       : chr  "Abilene TX" "Akron OH" "Alameda CA" "Albany GA" ...
##  $ country.etc: chr  "TX" "OH" "CA" "GA" ...
##  $ pop        : int  113888 206634 70069 75510 93576 45535 494962 44933 127159 88857 ...
##  $ capital    : int  0 0 0 0 2 0 0 0 0 0 ...
##  $ long.1     : num  24544 1533716 -1931842 1498524 2096820 ...
##  $ lat.1      : num  -1392670 -262400 -543195 -1350290 82431 ...
##  $ useName    : chr  "Abilene" "Akron" "Alameda" "Albany" ...
# Define a key region for plotting
rgnPlot <- c(usmap::.west_south_central, usmap::.east_south_central)
popFilter <- 100000

# Add cities as a layer to the state map
usmap::plot_usmap(regions="states", include=rgnPlot, fill="lightblue") + 
    labs(title=paste0("South Central Cities with Population >= ", popFilter/1000, "k")) + 
    geom_point(data=filter(trCity, pop >= popFilter, country.etc %in% rgnPlot), 
               aes(x=long.1, y=lat.1, size=pop), alpha=0.5
               )

# Plot the full nation for cities of 250k +
rgnPlot <- c(usmap::.midwest_region, usmap::.northeast_region, 
             usmap::.south_region, usmap::.west_region
             )
popFilter <- 250000

# Add cities as a layer to the state map
usmap::plot_usmap(regions="states", include=rgnPlot, fill="lightblue") + 
    labs(title=paste0("US Cities with Population >= ", popFilter/1000, "k")) + 
    geom_point(data=filter(trCity, pop >= popFilter, country.etc %in% rgnPlot), 
               aes(x=long.1, y=lat.1, size=pop), alpha=0.5
               )

# Plot cities by name for the Four Corners region
rgnPlot <- c("UT", "CO", "NM", "AZ")
popFilter <- 125000

# Add cities as a layer to the state map (points)
usmap::plot_usmap(regions="counties", include=rgnPlot, fill="lightblue") + 
    labs(title=paste0("Four Corners Cities with Population >= ", popFilter/1000, "k")) + 
    geom_point(data=filter(trCity, pop >= popFilter, country.etc %in% rgnPlot), 
               aes(x=long.1, y=lat.1, size=pop), alpha=0.5
               )

# Add cities as a layer to the state map (text)
usmap::plot_usmap(regions="counties", include=rgnPlot, fill="lightblue") + 
    labs(title=paste0("Four Corners Cities with Population >= ", popFilter/1000, "k")) + 
    geom_text(data=filter(trCity, pop >= popFilter, country.etc %in% rgnPlot), 
               aes(x=long.1, y=lat.1, size=pop, label=useName)
               )

popFilter <- 50000
popFilter2 <- 250000

# Add cities as a layer to the state map (points and text)
usmap::plot_usmap(regions="counties", include=rgnPlot, fill="lightblue") + 
    labs(title=paste0("Four Corners Cities with Population >= ", popFilter/1000, "k")) + 
    geom_point(data=filter(trCity, pop >= popFilter, country.etc %in% rgnPlot), 
               aes(x=long.1, y=lat.1, size=pop), alpha=0.5
               ) + 
    geom_text(data=filter(trCity, pop >= popFilter2, country.etc %in% rgnPlot), 
               aes(x=long.1, y=lat.1, size=pop, label=useName), color="red", show.legend=FALSE
               )

Example #6: Custom coloring geographies

Using scale_fill_manual(), custom colors can be created by geography.

Example code includes:

# Basic county population map with continuous colors
usmap::countypop %>% 
    filter(abbr %in% c("OH", "IN", "KY")) %>% 
    mutate(pop=pop_2015/1000, name=str_replace(str_replace(county, " County", ""), " ", "\n")) %>%
    usmap::plot_usmap(regions="counties", include=c("OH", "IN", "KY"), data=., values="pop") +
    scale_fill_continuous(low="lightblue", high="darkblue", "Pop. (000)") + 
    labs(title="Indiana, Ohio, and Kentucky - Population by County")

# Custom county population map with colors - red for Indiana, blue for Kentucky, grey for Ohio
popBucket <- c(0, 100, 500)
popLabels <- sapply(1:(length(popBucket)-1), FUN=function(x){paste0(popBucket[x], "-", popBucket[x+1])})
popLabels <- c(popLabels, paste0(popBucket[length(popBucket)], "+"))
guideLabels <- paste(rep(c("OH", "KY", "IN"), each=3), popLabels)

usmap::countypop %>% 
    filter(abbr %in% c("OH", "KY", "IN")) %>% 
    mutate(pop=pop_2015/1000, name=str_replace(str_replace(county, " County", ""), " ", "\n"), 
           pBucket=findInterval(pop, popBucket), 
           pColor=rgb(abbr=="IN", 0, abbr=="KY", pBucket/length(popBucket))
           ) %>%
    usmap::plot_usmap(regions="counties", include=c("OH", "IN", "KY"), data=., values="pColor") +
    scale_fill_identity(guide="legend", "Pop. (000)", labels=guideLabels) + 
    labs(title="Indiana, Ohio, and Kentucky - Population by County") + 
    theme(legend.position = "bottom") + 
    guides(fill=guide_legend(nrow=3))

# Custom county poverty rate map with colors - red for Indiana, blue for Kentucky, grey for Ohio
povBucket <- c(0, 15, 30)
povLabels <- sapply(1:(length(povBucket)-1), FUN=function(x){paste0(povBucket[x], "-", povBucket[x+1])})
povLabels <- c(povLabels, paste0(povBucket[length(povBucket)], "+"))
guideLabels <- paste(rep(c("OH", "KY", "IN"), each=3), povLabels)

usmap::countypov %>% 
    filter(abbr %in% c("OH", "KY", "IN")) %>% 
    mutate(name=str_replace(str_replace(county, " County", ""), " ", "\n"), 
           pBucket=findInterval(pct_pov_2014, povBucket), 
           pColor=rgb(abbr=="IN", 0, abbr=="KY", pBucket/length(povBucket))
           ) %>%
    usmap::plot_usmap(regions="counties", include=c("OH", "IN", "KY"), data=., values="pColor") +
    scale_fill_identity(guide="legend", "Poverty Rate (%)", labels=guideLabels) + 
    labs(title="Indiana, Ohio, and Kentucky - Poverty Rate by County") + 
    theme(legend.position = "bottom") + 
    guides(fill=guide_legend(nrow=3))

Example #7: Custom labeling of key geographies

The above techniques can be combined for custom labeling of key geographies.

Example code includes:

# Basic state population data
stateData <- usmap::statepop %>% 
    mutate(pop=round(pop_2015/1000000, 1), 
           name=ifelse(full=="District of Columbia", "DC", str_replace(full, " ", "\n")), 
           lab=paste0(abbr, "\n(", pop, ")\n")
           )

# Load state centroid data
stCenter <- utils::read.csv(system.file("extdata", 
                                        paste0("us_", "states", "_centroids.csv"), package = "usmap"
                                        ),
                            colClasses = c(rep("numeric", 2), rep("character", 3)), stringsAsFactors = FALSE
                            )

# Grab centroids for top 5 states
top5State <- stateData %>%
    top_n(5, pop) %>% 
    left_join(select(stCenter, x, y, fips))
## Joining, by = "fips"
# Plot state population with continuous colors and custom labels
stateData %>% 
    usmap::plot_usmap(regions="states", data=., values="pop") +
    scale_fill_continuous(low="lightblue", high="darkblue", "Pop. (millions)") + 
    labs(title="Population by State", subtitle="Top 5 in millions") + 
    geom_text(data=top5State, aes(x=x, y=y, label=lab), color="white", size=4, fontface="bold")

# Load county centroid data
ctCenter <- utils::read.csv(system.file("extdata", 
                                        paste0("us_", "counties", "_centroids.csv"), package = "usmap"
                                        ),
                            colClasses = c(rep("numeric", 2), rep("character", 4)), stringsAsFactors = FALSE
                            )

# Custom county population map with colors - red for Wisconsin, blue for Michigan
popBucket <- c(0, 100, 500)
popLabels <- sapply(1:(length(popBucket)-1), FUN=function(x){paste0(popBucket[x], "-", popBucket[x+1])})
popLabels <- c(popLabels, paste0(popBucket[length(popBucket)], "+"))
guideLabels <- paste(rep(c("MI", "WI"), each=3), popLabels)

# Grab county data for counties exceeding the top popBucket
ctyData <- usmap::countypop %>%
    filter(abbr %in% c("MI", "WI")) %>%
    mutate(pop=round(pop_2015/1000, 0), 
           name=str_replace(str_replace(county, " County", ""), " ", "\n"), 
           lab=paste0(name, "\n(", pop, ")\n")
           )

topCounty <- ctyData %>%
    filter(pop >= max(popBucket)) %>%
    left_join(select(ctCenter, x, y, fips))
## Joining, by = "fips"
# Create county population map
usmap::countypop %>% 
    filter(abbr %in% c("MI", "WI")) %>% 
    mutate(pop=pop_2015/1000, name=str_replace(str_replace(county, " County", ""), " ", "\n"), 
           pBucket=findInterval(pop, popBucket), 
           pColor=rgb(abbr=="WI", 0, abbr=="MI", pBucket/length(popBucket))
           ) %>%
    usmap::plot_usmap(regions="counties", include=c("WI", "MI"), data=., values="pColor") +
    scale_fill_identity(guide="legend", "Pop. (000)", labels=guideLabels) + 
    geom_text(data=topCounty, aes(x=x, y=y, label=lab), size=3, fontface="bold", color="white") +
    labs(title="Michigan and Wisconsin - Population by County", subtitle="Labelled Pop. (000) for 500k+") + 
    theme(legend.position = "bottom") + 
    guides(fill=guide_legend(nrow=3)) + 
    theme(panel.background=element_rect(color="black", fill="lightgrey"))

Example #8: Plotting Weather Data (Temperatures and Dew Points)

The ggridges package has weather data for Lincoln, NE in the data file ‘lincoln_weather’. The data are captured once per day for 366 days of 2016. Simple plots can be made of the average temperatures and dew points.

Example code includes:

data(lincoln_weather, package="ggridges")
str(lincoln_weather, give.attr=FALSE)
## Classes 'tbl_df', 'tbl' and 'data.frame':    366 obs. of  24 variables:
##  $ CST                         : chr  "2016-1-1" "2016-1-2" "2016-1-3" "2016-1-4" ...
##  $ Max Temperature [F]         : int  37 41 37 30 38 34 33 28 22 31 ...
##  $ Mean Temperature [F]        : int  24 23 23 17 29 33 30 25 9 11 ...
##  $ Min Temperature [F]         : int  11 5 8 4 19 32 27 22 -4 -9 ...
##  $ Max Dew Point [F]           : int  19 22 23 24 29 33 32 25 17 20 ...
##  $ Mean Dew Point [F]          : int  13 14 15 13 25 32 30 22 4 5 ...
##  $ Min Dewpoint [F]            : int  8 4 8 2 19 29 25 18 -8 -13 ...
##  $ Max Humidity                : int  88 100 92 92 96 100 100 92 87 87 ...
##  $ Mean Humidity               : int  68 72 73 82 83 91 96 85 77 75 ...
##  $ Min Humidity                : int  47 44 54 72 70 82 92 78 67 63 ...
##  $ Max Sea Level Pressure [In] : num  30.5 30.4 30.5 30.5 30.2 ...
##  $ Mean Sea Level Pressure [In]: num  30.4 30.3 30.4 30.4 30.1 ...
##  $ Min Sea Level Pressure [In] : num  30.3 30.2 30.3 30.2 30 ...
##  $ Max Visibility [Miles]      : int  10 10 10 10 10 10 9 10 10 10 ...
##  $ Mean Visibility [Miles]     : int  10 10 10 9 8 4 3 6 9 10 ...
##  $ Min Visibility [Miles]      : int  10 10 10 6 5 0 0 2 5 10 ...
##  $ Max Wind Speed [MPH]        : int  20 15 13 17 22 16 16 25 25 10 ...
##  $ Mean Wind Speed[MPH]        : int  9 6 5 7 13 7 7 16 14 5 ...
##  $ Max Gust Speed [MPH]        : int  23 18 14 23 28 21 21 32 28 12 ...
##  $ Precipitation [In]          : chr  "0" "0" "0" "0" ...
##  $ CloudCover                  : int  0 0 0 1 4 8 8 8 5 0 ...
##  $ Events                      : chr  NA NA NA NA ...
##  $ WindDir [Degrees]           : int  280 312 330 155 178 167 7 338 340 268 ...
##  $ Month                       : Factor w/ 12 levels "December","November",..: 12 12 12 12 12 12 12 12 12 12 ...
# Extract temperature and dew point data
tdData <- lincoln_weather %>%
    select(CST, maxT=`Max Temperature [F]`, minT=`Min Temperature [F]`, meanT=`Mean Temperature [F]`, 
           maxD=`Max Dew Point [F]`, minD=`Min Dewpoint [F]`, meanD=`Mean Dew Point [F]`
           ) %>%
    mutate(date=as.Date(CST))
str(tdData)
## Classes 'tbl_df', 'tbl' and 'data.frame':    366 obs. of  8 variables:
##  $ CST  : chr  "2016-1-1" "2016-1-2" "2016-1-3" "2016-1-4" ...
##  $ maxT : int  37 41 37 30 38 34 33 28 22 31 ...
##  $ minT : int  11 5 8 4 19 32 27 22 -4 -9 ...
##  $ meanT: int  24 23 23 17 29 33 30 25 9 11 ...
##  $ maxD : int  19 22 23 24 29 33 32 25 17 20 ...
##  $ minD : int  8 4 8 2 19 29 25 18 -8 -13 ...
##  $ meanD: int  13 14 15 13 25 32 30 22 4 5 ...
##  $ date : Date, format: "2016-01-01" "2016-01-02" ...
# Plot temperatures by day
tdData %>%
    select(date, maxT, meanT, minT) %>%
    pivot_longer(-date) %>%
    ggplot(aes(x=date, y=value, group=name)) + 
    geom_line(aes(color=name))

# Plot dew points by day
tdData %>%
    select(date, maxD, meanD, minD) %>%
    pivot_longer(-date) %>%
    ggplot(aes(x=date, y=value, group=name)) + 
    geom_line(aes(color=name))

library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
# Create an XTS for temperature data
tdXTS <- xts(select(tdData, minT, meanT, maxT), order.by=tdData$date)

# Create and plot weekly and monthly averages
tdXTS %>%
    apply.weekly(FUN=mean, na.rm=TRUE) %>%
    plot(main="Weekly Temperature Average (Lincoln, NE 2016)")

tdXTS %>%
    apply.monthly(FUN=mean, na.rm=TRUE) %>%
    plot(main="Monthly Temperature Average (Lincoln, NE 2016)")

# Create an XTS for dew-point data
tdXTS <- xts(select(tdData, minD, meanD, maxD), order.by=tdData$date)

# Create and plot weekly and monthly averages
tdXTS %>%
    apply.weekly(FUN=mean, na.rm=TRUE) %>%
    plot(main="Weekly Dew Point Average (Lincoln, NE 2016)")

tdXTS %>%
    apply.monthly(FUN=mean, na.rm=TRUE) %>%
    plot(main="Monthly Dew Point Average (Lincoln, NE 2016)")

Example #9: Combining xts and ggplot2

The xts package is good for working with time series data, while ggplot2 is strong for customizing plots. The packages can be combined in using the weather data.

Example code includes:

# Create an XTS for temperature and dewpoint data
tdXTS <- xts(select(tdData, minT, meanT, maxT, minD, meanD, maxD), order.by=tdData$date)

# Use xts for monthly average and ggplot2 for plotting
basePlot <- tdXTS %>%
    apply.monthly(FUN=mean, na.rm=TRUE) %>% 
    data.frame(date=index(.), row.names=NULL) %>% 
    ggplot(aes(x=date-lubridate::days(15))) + 
    geom_ribbon(aes(ymin=minT, ymax=maxT), color="lightblue", fill="lightblue", alpha=0.5) +
    geom_line(aes(y=meanT), color="blue", lwd=1) + 
    labs(x="Month", y="Avg. Temperature (F)", title="Lincoln, NE Weather (2016)", 
         subtitle="Monthly Avg. Temperature (F)"
         )
basePlot

# Add labelling for the three elements
hiMonth <- index(tdXTS %>% apply.monthly(FUN=mean))[3]
loMonth <- index(tdXTS %>% apply.monthly(FUN=mean))[9]
muMonth <- index(tdXTS %>% apply.monthly(FUN=mean))[6]
hiPoint <- c(60, 75)
loPoint <- c(45, 25)
muPoint <- c(72.5, 45)

labFrame <- data.frame(x=c(hiMonth, loMonth, muMonth), 
                       yend=c(hiPoint[1], loPoint[1], muPoint[1]),
                       y=c(hiPoint[2], loPoint[2], muPoint[2]), 
                       text=c("Avg. Monthly High", "Avg. Monthly Low", "Avg. Monthly Mean")) %>%
    mutate(xend=x)

basePlot + 
    geom_segment(data=labFrame, aes(x=x, y=y, xend=xend, yend=yend), arrow=arrow()) + 
    geom_text(data=labFrame, aes(x=x, y=y+c(5, -5, -5), label=text), fontface="bold", size=4)

# Can also create and plot a custom rolling average
baseData <- tdXTS %>%
    data.frame(date=index(.), row.names=NULL)

base7Day <- rollapply(tdXTS, 7, FUN=mean, na.rm=TRUE) %>% 
    data.frame(date=index(.), row.names=NULL)

base30Day <- rollapply(tdXTS, 30, FUN=mean, na.rm=TRUE) %>% 
    data.frame(date=index(.), row.names=NULL)

plotFrame <- bind_rows(baseData, base7Day, base30Day, .id="rolling") %>%
    mutate(rollLabel=case_when(rolling==1 ~ "Daily", 
                               rolling==2 ~ "7 Day Rolling", 
                               rolling==3 ~ "30 Day Rolling",
                               TRUE ~ "ERROR"
                               )
           )
            
plotFrame %>%
    ggplot(aes(x=date)) + 
    geom_line(aes(y=meanT, color=rollLabel, group=rollLabel), lwd=1) + 
    labs(x="Month", y="Avg. Temperature (F)", title="Lincoln, NE Weather (2016)", 
         subtitle="Daily Avg. Temperature (F)"
         )
## Warning: Removed 35 rows containing missing values (geom_path).

Example #10: Plotting Weather Data (Humidity)

Humidity data are also available in the lincoln_weather dataset. There is a relationship between temperature, dewpoint, and humidity.

Example code includes:

htdData <- lincoln_weather %>%
    select(CST, meanT=`Mean Temperature [F]`, meanD=`Mean Dew Point [F]`, meanH=`Mean Humidity`) %>%
    mutate(date=as.Date(CST), month=lubridate::month(date))
str(htdData)
## Classes 'tbl_df', 'tbl' and 'data.frame':    366 obs. of  6 variables:
##  $ CST  : chr  "2016-1-1" "2016-1-2" "2016-1-3" "2016-1-4" ...
##  $ meanT: int  24 23 23 17 29 33 30 25 9 11 ...
##  $ meanD: int  13 14 15 13 25 32 30 22 4 5 ...
##  $ meanH: int  68 72 73 82 83 91 96 85 77 75 ...
##  $ date : Date, format: "2016-01-01" "2016-01-02" ...
##  $ month: num  1 1 1 1 1 1 1 1 1 1 ...
# Histogram for average humidity
htdData %>%
    ggplot(aes(x=meanH)) + 
    geom_histogram() + 
    labs(title="Mean Humidity Histogram", subtitle="Lincoln, NE (2016)", x="Mean Humidity (%)", y="Count")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (stat_bin).

htdData %>%
    filter((meanH < 25) | is.na(meanH))
## # A tibble: 2 x 6
##   CST       meanT meanD meanH date       month
##   <chr>     <int> <int> <int> <date>     <dbl>
## 1 2016-2-27    50    21     0 2016-02-27     2
## 2 2016-2-28    44    NA    NA 2016-02-28     2
htdData <- htdData %>%
    filter(!((meanH < 25) | is.na(meanH)))

# Updated Histogram for average humidity
htdData %>%
    ggplot(aes(x=meanH)) + 
    geom_histogram() + 
    labs(title="Mean Humidity Histogram", subtitle="Lincoln, NE (2016)", x="Mean Humidity (%)", y="Count")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Histogram for dewpoint depression (T - D)
htdData %>%
    ggplot(aes(x=meanT-meanD)) + 
    geom_histogram() + 
    labs(title="Mean Dewpoint Depression Histogram", subtitle="Lincoln, NE (2016)", 
         x="Mean Dewpoint Depression (F)", y="Count")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

htdData %>% 
    filter(meanD >= meanT)
## # A tibble: 2 x 6
##   CST        meanT meanD meanH date       month
##   <chr>      <int> <int> <int> <date>     <dbl>
## 1 2016-1-7      30    30    96 2016-01-07     1
## 2 2016-11-27    37    39    85 2016-11-27    11
htdData <- htdData %>%
    filter(meanT >= meanD)

# Updated Histogram for dewpoint depression (T - D)
htdData %>%
    ggplot(aes(x=meanT-meanD, y=..density..)) + 
    geom_histogram(binwidth=1) + 
    geom_density(color="red") + 
    labs(title="Mean Dewpoint Depression Histogram", subtitle="Lincoln, NE (2016)", 
         x="Mean Dewpoint Depression (F)", y="Proportion")

# Average humidity by month
htdData %>%
    group_by(month) %>%
    summarize(meanH=mean(meanH, na.rm=TRUE)) %>%
    ggplot(aes(x=as.factor(month), y=meanH)) + 
    geom_col() + 
    labs(title="Average Humidity by Month", subtitle="Lincoln, NE (2016)", x="Month", y="Mean Humidity (%)")

# Relationship between temperature and dewpoint
htdData %>%
    ggplot(aes(x=meanD, y=meanT)) + 
    geom_point() + 
    geom_abline(slope=1, intercept=0) + 
    labs(title="Daily Averages", subtitle="Lincoln, NE (2016)", 
         x="Mean Dewpoint (F)", y="Mean Temperature (F)"
         )

# Relationship between dewpoint depression and humidity
htdData %>%
    mutate(dpD=meanT-meanD) %>%
    ggplot(aes(x=dpD, y=meanH)) + 
    geom_point() + 
    labs(title="Daily Averages", subtitle="Lincoln, NE (2016)", 
         x="Mean Dewpoint Depression (F)", y="Mean Humidity (%)"
         )

# Relationship between temperature and dewpoint and humidity
humInts <- c(0, 50, 60, 70, 80)
humLabel <- sapply(1:(length(humInts)-1), FUN=function(x) { paste0(humInts[x], "-", humInts[x+1]) })
humLabel <- c(humLabel, paste0(humInts[length(humInts)], "+"))

htdData %>%
    mutate(humBin=factor(findInterval(meanH, humInts), levels=1:length(humInts), labels=humLabel)) %>%
    ggplot(aes(x=meanD, y=meanT, color=humBin)) + 
    geom_point() + 
    geom_smooth(se=FALSE) +
    geom_abline(slope=1, intercept=0) + 
    labs(title="Daily Averages", subtitle="Lincoln, NE (2016)", 
         x="Mean Dewpoint (F)", y="Mean Temperature (F)"
         )
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Expressed using dewpoint depression vs. dewpoint
htdData %>%
    mutate(humBin=factor(findInterval(meanH, humInts), levels=1:length(humInts), labels=humLabel)) %>%
    ggplot(aes(x=meanD, y=meanT-meanD, color=humBin)) + 
    geom_point() + 
    geom_smooth() +
    labs(title="Daily Averages", subtitle="Lincoln, NE (2016)", 
         x="Mean Dewpoint (F)", y="Dewpoint Depression (F)"
         )
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Expressed using dewpoint depression vs. temperature
htdData %>%
    mutate(humBin=factor(findInterval(meanH, humInts), levels=1:length(humInts), labels=humLabel)) %>%
    ggplot(aes(x=meanT, y=meanT-meanD, color=humBin)) + 
    geom_point() + 
    geom_smooth(se=FALSE, method="lm") +
    labs(title="Daily Averages", subtitle="Lincoln, NE (2016)", 
         x="Mean Temperature (F)", y="Dewpoint Depression (F)"
         )

# Linear regression for temperature, dewpoint, and humidity
htdReg <- htdData %>%
    mutate(dpD=meanT-meanD) %>%
    lm(meanH ~ meanT + dpD, data=.)
summary(htdReg)
## 
## Call:
## lm(formula = meanH ~ meanT + dpD, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.3569  -2.5472  -0.1894   2.7285  14.4934 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 82.89890    0.83037  99.833  < 2e-16 ***
## meanT        0.09771    0.01403   6.965 1.57e-11 ***
## dpD         -1.79530    0.04811 -37.317  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.869 on 360 degrees of freedom
## Multiple R-squared:  0.796,  Adjusted R-squared:  0.7949 
## F-statistic: 702.4 on 2 and 360 DF,  p-value: < 2.2e-16
htdData %>%
    mutate(dpD=meanT-meanD) %>%
    mutate(predH=predict(htdReg, newdata=.)) %>%
    ggplot(aes(x=predH, y=meanH)) + 
    geom_point() + 
    geom_abline(slope=1, intercept=0) + 
    labs(title="Daily Averages", subtitle="Lincoln, NE (2016)", 
         x="Predicted Humidity (%)", y="Actual Humidity (%)"
         )

Example #11: Plotting Weather Data (Wind)

Wind data (speed, gust, direction) are also available in the lincoln_weather dataset..

Example code includes:

# Extract wind data
wdData <- lincoln_weather %>%
    select(CST, maxW=`Max Wind Speed [MPH]`, maxG=`Max Gust Speed [MPH]`, meanW=`Mean Wind Speed[MPH]`, 
           dirW=`WindDir [Degrees]`
           ) %>%
    mutate(date=as.Date(CST))
str(wdData)
## Classes 'tbl_df', 'tbl' and 'data.frame':    366 obs. of  6 variables:
##  $ CST  : chr  "2016-1-1" "2016-1-2" "2016-1-3" "2016-1-4" ...
##  $ maxW : int  20 15 13 17 22 16 16 25 25 10 ...
##  $ maxG : int  23 18 14 23 28 21 21 32 28 12 ...
##  $ meanW: int  9 6 5 7 13 7 7 16 14 5 ...
##  $ dirW : int  280 312 330 155 178 167 7 338 340 268 ...
##  $ date : Date, format: "2016-01-01" "2016-01-02" ...
# Manage missing data
wdData[!complete.cases(wdData), ]
## # A tibble: 6 x 6
##   CST         maxW  maxG meanW  dirW date      
##   <chr>      <int> <int> <int> <int> <date>    
## 1 2016-2-6      18    NA     5   241 2016-02-06
## 2 2016-2-28     36    45    18    NA 2016-02-28
## 3 2016-5-31     17    NA    11   319 2016-05-31
## 4 2016-6-18     NA    NA    NA    -1 2016-06-18
## 5 2016-6-19     NA    NA    NA    -1 2016-06-19
## 6 2016-12-19    17    NA     9   209 2016-12-19
wdData <- wdData %>%
    filter(dirW != -1, !is.na(dirW)) %>%
    mutate(maxG=ifelse(is.na(maxG), maxW, maxG))
summary(wdData)
##      CST                 maxW             maxG            meanW      
##  Length:363         Min.   :  8.00   Min.   : 10.00   Min.   : 0.00  
##  Class :character   1st Qu.: 16.00   1st Qu.: 21.00   1st Qu.: 6.00  
##  Mode  :character   Median : 21.00   Median : 26.00   Median : 8.00  
##                     Mean   : 21.71   Mean   : 27.93   Mean   : 9.11  
##                     3rd Qu.: 25.00   3rd Qu.: 33.00   3rd Qu.:11.00  
##                     Max.   :131.00   Max.   :143.00   Max.   :27.00  
##       dirW            date           
##  Min.   :  1.0   Min.   :2016-01-01  
##  1st Qu.:143.5   1st Qu.:2016-04-01  
##  Median :193.0   Median :2016-07-03  
##  Mean   :209.8   Mean   :2016-07-01  
##  3rd Qu.:310.0   3rd Qu.:2016-10-01  
##  Max.   :358.0   Max.   :2016-12-31
# Manage very high wind data
wdData[wdData$maxG >= 60, ]
## # A tibble: 3 x 6
##   CST         maxW  maxG meanW  dirW date      
##   <chr>      <int> <int> <int> <int> <date>    
## 1 2016-3-23     45    61    25    20 2016-03-23
## 2 2016-6-17    131   143    20   170 2016-06-17
## 3 2016-12-25    51    64    19   139 2016-12-25
wdData <- wdData %>%
    filter(maxG <= 80)
summary(wdData)
##      CST                 maxW           maxG           meanW      
##  Length:362         Min.   : 8.0   Min.   :10.00   Min.   : 0.00  
##  Class :character   1st Qu.:16.0   1st Qu.:21.00   1st Qu.: 6.00  
##  Mode  :character   Median :21.0   Median :26.00   Median : 8.00  
##                     Mean   :21.4   Mean   :27.61   Mean   : 9.08  
##                     3rd Qu.:25.0   3rd Qu.:33.00   3rd Qu.:11.00  
##                     Max.   :51.0   Max.   :64.00   Max.   :27.00  
##       dirW            date           
##  Min.   :  1.0   Min.   :2016-01-01  
##  1st Qu.:143.2   1st Qu.:2016-04-01  
##  Median :193.0   Median :2016-07-03  
##  Mean   :209.9   Mean   :2016-07-01  
##  3rd Qu.:310.0   3rd Qu.:2016-10-01  
##  Max.   :358.0   Max.   :2016-12-31
# Density of wind speeds
wdData %>%
    select(date, meanW, maxW, maxG) %>%
    pivot_longer(-date) %>%
    ggplot(aes(x=value, fill=name)) + 
    geom_density(alpha=0.5) + 
    scale_fill_discrete(name="Wind Speed [MPH]", labels=c("Max Gust", "Max", "Mean")) + 
    labs(title="Lincoln, NE (2016) Wind Speeds", y="Density", x="Wind Speed [MPH]")

# Density of wind direction
wdData %>%
    select(date, dirW) %>%
    ggplot(aes(x=dirW)) + 
    geom_density(alpha=0.5, fill="blue") + 
    labs(title="Winds are mainly from the S and NW", subtitle="Lincoln, NE (2016)", 
         y="Density", x="Wind Direction"
         )

# Wind speed and direction
wdData %>% 
    ggplot(aes(x=meanW, y=dirW)) + 
    geom_point(alpha=0.25) + 
    coord_polar(theta="y") + 
    labs(title="Lincoln, NE (2016)", subtitle="Direction vs. Mean Wind Speed", x="Mean Wind Speed [MPH]") + 
    scale_y_continuous(limits=c(0, 360), breaks=c(0, 90, 180, 270, 360)) + 
    scale_x_continuous(limits=c(0, 30), breaks=c(0, 5, 10, 15, 20, 25, 30)) + 
    geom_point(aes(x=0, y=0), color="red", size=2)

# Wind speed and direction as factors
windDirs <- c("N", "NE", "E", "SE", "S", "SW", "W", "NW")
windSpeeds <- c(0, 5, 10, 15)
windLabels <- sapply(1:(length(windSpeeds)-1), FUN=function(x){ paste0(windSpeeds[x], "-", windSpeeds[x+1]) })
windLabels <- c(windLabels, paste0(windSpeeds[length(windSpeeds)], "+"))
wdData <- wdData %>% 
    mutate(wd=factor(floor(((dirW+22.5)/45) %% 8), levels=0:7, labels=windDirs), 
           ws=factor(findInterval(meanW, windSpeeds), levels=length(windSpeeds):1, labels=rev(windLabels))
           )

# Summary of interaction between wind speed and wind direction 
wdData %>%
    group_by(wd) %>% 
    summarize(n=n(), avgMean=mean(meanW), avgMax=mean(maxW), avgGust=mean(maxG))
## # A tibble: 8 x 5
##   wd        n avgMean avgMax avgGust
##   <fct> <int>   <dbl>  <dbl>   <dbl>
## 1 N        51   10.1    21.4    27.7
## 2 NE       18    8.17   19.4    24.8
## 3 E        16    7.44   18.9    24.6
## 4 SE       69    8.01   19.6    25.5
## 5 S        73    8.89   20.6    26.8
## 6 SW       26    6.96   20.3    26.2
## 7 W        29    8.10   20.4    26.5
## 8 NW       80   11.1    25.4    32.2
table(wdData$wd, wdData$ws)
##     
##      15+ 10-15 5-10 0-5
##   N    8    14   25   4
##   NE   1     7    6   4
##   E    0     3   12   1
##   SE   3    15   42   9
##   S    5    23   43   2
##   SW   0     6   15   5
##   W    3     4   18   4
##   NW  17    31   28   4
# Graph of wind speed and wind direction
wdData %>%
    ggplot(aes(x=wd, fill=ws)) + 
    geom_bar() + 
    scale_fill_discrete(name="Wind Speed [MPH]") + 
    labs(title="Lincoln, NE (2016) Wind Speeds and Directions", y="# Days", x="Wind Direction")

# Graph of wind speed and wind direction (polar coordinates)
wdData %>%
    ggplot(aes(x=wd, fill=ws)) + 
    geom_bar() + 
    scale_fill_discrete(name="Wind Speed [MPH]") + 
    labs(title="Lincoln, NE (2016) Wind Speeds and Directions", y="# Days", x="Wind Direction") + 
    coord_polar(start=-0.4)

Example #12: Archived granular weather Data (METAR)

Iowa State has a great database of archived weather data, including the historical METAR data (meteorological aerodrome report) for a number of reporting stations.

METAR include information on visibility, wind, temperature, dew point, precipitation, clouds, barometric pressure, and other features that may impact safe aviation.

The data for station KLNK (Lincoln, NE airport) was saved as a CSV from Iowa State

Some processing is required before using the METAR data:

Example code includes:

# Load METAR data
klnk <- readr::read_csv("./RInputFiles/metar_klnk_2016.txt", na=c("", "NA", "M"))
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   station = col_character(),
##   valid = col_datetime(format = ""),
##   p01i = col_character(),
##   skyc1 = col_character(),
##   skyc2 = col_character(),
##   skyc3 = col_character(),
##   skyc4 = col_logical(),
##   skyl4 = col_logical(),
##   wxcodes = col_character(),
##   ice_accretion_1hr = col_character(),
##   ice_accretion_3hr = col_character(),
##   ice_accretion_6hr = col_character(),
##   peak_wind_time = col_datetime(format = ""),
##   metar = col_character()
## )
## See spec(...) for full column specifications.
str(klnk, give.attr=FALSE)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 10594 obs. of  29 variables:
##  $ station          : chr  "LNK" "LNK" "LNK" "LNK" ...
##  $ valid            : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ tmpf             : num  27 26.1 27 27 27 ...
##  $ dwpf             : num  19.9 19.9 19.9 21 19.9 ...
##  $ relh             : num  74.5 77.3 74.5 78 74.5 ...
##  $ drct             : num  300 0 0 280 310 10 0 10 20 0 ...
##  $ sknt             : num  5 0 0 3 5 9 0 3 3 0 ...
##  $ p01i             : chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ alti             : num  30.3 30.3 30.3 30.3 30.3 ...
##  $ mslp             : num  1028 1028 1028 1028 1029 ...
##  $ vsby             : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ gust             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyc1            : chr  "OVC" "OVC" "OVC" "OVC" ...
##  $ skyc2            : chr  NA NA NA NA ...
##  $ skyc3            : chr  NA NA NA NA ...
##  $ skyc4            : logi  NA NA NA NA NA NA ...
##  $ skyl1            : num  2800 2700 2600 2700 2100 2700 2700 2700 2600 2600 ...
##  $ skyl2            : num  NA NA NA NA 2700 NA NA NA NA NA ...
##  $ skyl3            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl4            : logi  NA NA NA NA NA NA ...
##  $ wxcodes          : chr  NA NA NA NA ...
##  $ ice_accretion_1hr: chr  NA NA NA NA ...
##  $ ice_accretion_3hr: chr  NA NA NA NA ...
##  $ ice_accretion_6hr: chr  NA NA NA NA ...
##  $ peak_wind_gust   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_drct   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_time   : POSIXct, format: NA NA ...
##  $ feel             : num  20.4 26.1 27 22.9 20.4 ...
##  $ metar            : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
# Filter to only data that ends with times ending in 54Z
metarKLNK <- klnk %>%
    filter(str_detect(metar, "54Z"))
dim(metarKLNK)
## [1] 8813   29
# There should be 24*368=8832 records, so there are a handful (19) of missing METAR observations
minDate <- min(metarKLNK$valid)
expDate <- minDate + lubridate::hours(0:(24*368 - 1))

# Observations expected but not recorded
as.POSIXct(setdiff(expDate, metarKLNK$valid), origin="1970-01-01", tz="UTC")
##  [1] "2016-01-19 11:54:00 UTC" "2016-05-06 11:54:00 UTC"
##  [3] "2016-05-06 12:54:00 UTC" "2016-06-17 23:54:00 UTC"
##  [5] "2016-06-18 00:54:00 UTC" "2016-06-18 07:54:00 UTC"
##  [7] "2016-07-02 15:54:00 UTC" "2016-07-13 14:54:00 UTC"
##  [9] "2016-07-13 15:54:00 UTC" "2016-07-13 16:54:00 UTC"
## [11] "2016-07-13 17:54:00 UTC" "2016-07-30 13:54:00 UTC"
## [13] "2016-08-02 07:54:00 UTC" "2016-08-05 07:54:00 UTC"
## [15] "2016-08-29 21:54:00 UTC" "2016-09-15 16:54:00 UTC"
## [17] "2016-09-16 05:54:00 UTC" "2016-11-21 00:54:00 UTC"
## [19] "2016-12-03 08:54:00 UTC"
# Observations recorded but not expected
setdiff(metarKLNK$valid, expDate)
## numeric(0)
# Confirmation of uniqueness
length(unique(metarKLNK$valid)) == length(metarKLNK$valid)
## [1] TRUE
# Extract wind speeds and direction
# The general wind format is dddssGssKT where ddd is the direction (VRB meaning variable), the main ss is the speed, and the Gss is the gust speed (optional and not always displayed)

mtxWind <- metarKLNK %>%
    pull(metar) %>%
    str_match(pattern="(\\d{3}|VRB)(\\d{2})(G\\d{2})?KT")
head(mtxWind)
##      [,1]      [,2]  [,3] [,4]
## [1,] "30005KT" "300" "05" NA  
## [2,] "00000KT" "000" "00" NA  
## [3,] "00000KT" "000" "00" NA  
## [4,] "28003KT" "280" "03" NA  
## [5,] "31005KT" "310" "05" NA  
## [6,] "01009KT" "010" "09" NA
table(mtxWind[, 2], useNA="ifany")
## 
##  000  010  020  030  040  050  060  070  080  090  100  110  120  130  140  150 
##  875  269  199  146  135  121  108   95   88   65  102  158  169  245  241  339 
##  160  170  180  190  200  210  220  230  240  250  260  270  280  290  300  310 
##  463  565  517  413  284  179  142   96   73   80  105   89  121  141  147  225 
##  320  330  340  350  360  VRB <NA> 
##  234  303  352  413  383  114   19
table(mtxWind[, 3], useNA="ifany")
## 
##   00   03   04   05   06   07   08   09   10   11   12   13   14   15   16   17 
##  875  615  704  664  696  651  636  599  536  451  439  371  315  276  235  173 
##   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32   34 
##  156  108   69   62   45   33   23   13   14    9   10    6    6    2    1    1 
## <NA> 
##   19
table(mtxWind[, 4], useNA="ifany")
## 
##  G14  G15  G16  G17  G18  G19  G20  G21  G22  G23  G24  G25  G26  G27  G28  G29 
##    6   11   11   16   30   59   69   87   83  107  101   83   62   61   61   37 
##  G30  G31  G32  G33  G34  G35  G36  G37  G38  G39  G40  G41  G42  G43  G45 <NA> 
##   28   24   18   13   12   14    6    6   10   11    2    3    1    2    2 7777
# Verify that winds not captured are in fact missing from the METAR
metarKLNK[which(is.na(mtxWind[, 2])), "metar"]
## # A tibble: 19 x 1
##    metar                                                                        
##    <chr>                                                                        
##  1 KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028                 
##  2 KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083                  
##  3 KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013
##  4 KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006~
##  5 KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $                 
##  6 KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007             
##  7 KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58~
##  8 KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $                    
##  9 KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106                   
## 10 KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200                
## 11 KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $                 
## 12 KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222       
## 13 KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001 
## 14 KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010 
## 15 KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183                   
## 16 KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100                   
## 17 KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $           
## 18 KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067                   
## 19 KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050
metarKLNK <- metarKLNK %>%
    mutate(dirW=mtxWind[, 2], 
           spdW=as.numeric(mtxWind[, 3]), 
           gustW=as.numeric(str_replace(mtxWind[, 4], "G", ""))
           )

# Plot for the wind direction
metarKLNK %>%
    ggplot(aes(x=dirW)) + 
    geom_bar() + 
    labs(title="Lincoln, NE Wind Direction", subtitle="KLNK METAR (2016)", 
         y="# Hourly Observations", x="Wind Direction"
         )

# Plot for the minimum, average, and maximum wind speed by wind direction
# Wind direction 000 is reserved for 0 KT wind, while VRB is reserved for 3-6 KT variable winds
metarKLNK %>%
    filter(!is.na(dirW)) %>%
    group_by(dirW) %>%
    summarize(minWind=min(spdW), meanWind=mean(spdW), maxWind=max(spdW)) %>%
    ggplot(aes(x=dirW)) + 
    geom_point(aes(y=meanWind), color="red", size=2) + 
    geom_errorbar(aes(ymin=minWind, ymax=maxWind)) + 
    labs(title="Lincoln, NE Wind Direction", subtitle="KLNK METAR (2016)", 
         y="Wind Speed [KT]", x="Wind Direction"
         )

# Plot for the wind speed
# Roughly 10% of the time, there is no wind in Lincoln
metarKLNK %>%
    ggplot(aes(x=spdW)) + 
    geom_bar(aes(y=..count../sum(..count..))) + 
    labs(title="Roughly 10% of wind speeds in Lincoln, NE measure 0 Knots", subtitle="KLNK METAR (2016)", 
         y="% Hourly Observations", x="Wind Speed {KT]"
         )
## Warning: Removed 19 rows containing non-finite values (stat_count).

metarKLNK %>% 
    filter(!is.na(dirW), dirW != "VRB", dirW != "000") %>%
    mutate(dirW=as.numeric(dirW)) %>%
    group_by(dirW, spdW) %>%
    summarize(n=n()) %>%
    ggplot(aes(x=spdW, y=dirW)) + 
    geom_point(alpha=0.1, aes(size=n)) + 
    coord_polar(theta="y") + 
    labs(title="Lincoln, NE (2016)", subtitle="Direction vs. Wind Speed", x="Wind Speed [KT]") + 
    scale_y_continuous(limits=c(0, 360), breaks=c(0, 90, 180, 270, 360)) + 
    scale_x_continuous(limits=c(0, 30), breaks=c(0, 5, 10, 15, 20, 25, 30)) + 
    geom_point(aes(x=0, y=0), color="red", size=2)
## Warning: Removed 4 rows containing missing values (geom_point).

Example #13: Extracting Key Elements from METAR

A properly formatted METAR includes the following information in order, though with variable amounts of other information in between.

dddd54Z ddddd[Gdd]KT dSM [M]dd/[M]dd Adddd RMK SLPddd Tdddddddd

  • dddd54Z is the two-digit date and four-digit Zulu time (KLNK METAR are taken at 54 minutes past the hour)
  • ddddd[Gdd]KT is the three-digit wind direction (can be VRB), two digit wind speed in knots, and sometimes the two digit maximum gust in knots
  • dSM is the visibility in statute miles. This is somewhat tricky in that d can be any of 0-10 but can also be 1/4, 1/2, 3/4, 1 1/4, 1 1/2, 1 3/4, 2 1/2
  • [M]dd/[M]dd is the temperature in celsius and dewpoint in celsius. M means negative
  • Adddd is the four-digit altimeter reading
  • RMK notes that the remarks are beginning
  • SLPddd notes the three-digit sea-level pressure
  • Tdddddddd notes the four digit temperature in Celsius and the four digit dewpoint in celsius. If it begins with 1 it is negative. The fourth digit is the decimal. It will always be a Celsius reading that best corresponds to integer degrees of Fahrenheit

Example code includes:

metAll <- metarKLNK %>%
    pull(metar)

# Create a search string for METAR
valMet <- "54Z.*?(VRB|\\d{3})(\\d{2})(G\\d{2})?KT(.*?)(\\d{1,2}SM).*?\\s(M?\\d{2})/(M?\\d{2}).*?(A\\d{4}).*?RMK.*?(SLP\\d{3}).*?(T\\d{8})"

# Find the number of matching elements
str_detect(metAll, pattern=valMet) %>% table()
## .
## FALSE  TRUE 
##    23  8790
# The strings that do not match have errors in the raw data (typically, missing wind speed)
metAll[!str_detect(metAll, pattern=valMet)]
##  [1] "KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028"                   
##  [2] "KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083"                    
##  [3] "KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013"  
##  [4] "KLNK 221654Z 19007KT CLR 15/03 A2956 RMK AO2 SLP006 T01500033 $"                
##  [5] "KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006 $" 
##  [6] "KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $"                   
##  [7] "KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007"               
##  [8] "KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58011"
##  [9] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"                      
## [10] "KLNK 011554Z 32007KT CLR 22/09 A3008 RMK AO2 SLP179 T02170089 $"                
## [11] "KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106"                     
## [12] "KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200"                  
## [13] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"              
## [14] "KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $"                   
## [15] "KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222"         
## [16] "KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001"   
## [17] "KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010"   
## [18] "KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183"                     
## [19] "KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100"                     
## [20] "KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $"             
## [21] "KLNK 201554Z 15014G19KT CLR 27/22 A3007 RMK AO2 SLP174 T02670217 $"             
## [22] "KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067"                     
## [23] "KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050"
# A matrix of string matches can be obtained
mtxParse <- str_match(metAll, pattern=valMet)
head(mtxParse)
##      [,1]                                                                   
## [1,] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067"       
## [2,] "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067"       
## [3,] "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067"       
## [4,] "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061"       
## [5,] "54Z 31005KT 10SM SCT021 OVC027 M03/M07 A3033 RMK AO2 SLP286 T10281067"
## [6,] "54Z AUTO 01009KT 10SM OVC027 M06/M10 A3033 RMK AO2 SLP289 T10611100"  
##      [,2]  [,3] [,4] [,5] [,6]   [,7]  [,8]  [,9]    [,10]    [,11]      
## [1,] "300" "05" NA   " "  "10SM" "M03" "M07" "A3029" "SLP275" "T10281067"
## [2,] "000" "00" NA   " "  "10SM" "M03" "M07" "A3030" "SLP277" "T10331067"
## [3,] "000" "00" NA   " "  "10SM" "M03" "M07" "A3030" "SLP277" "T10281067"
## [4,] "280" "03" NA   " "  "10SM" "M03" "M06" "A3031" "SLP281" "T10281061"
## [5,] "310" "05" NA   " "  "10SM" "M03" "M07" "A3033" "SLP286" "T10281067"
## [6,] "010" "09" NA   " "  "10SM" "M06" "M10" "A3033" "SLP289" "T10611100"
# Create a data frame
dfParse <- data.frame(mtxParse, stringsAsFactors=FALSE)
names(dfParse) <- c("METAR", "WindDir", "WindSpeed", "WindGust", "Dummy", "Visibility", 
                    "TempC", "DewC", "Altimeter", "SLP", "FahrC"
                    )
dfParse <- tibble::as_tibble(dfParse)
str(dfParse)
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  11 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : chr  "05" "00" "00" "03" ...
##  $ WindGust  : chr  NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: chr  "10SM" "10SM" "10SM" "10SM" ...
##  $ TempC     : chr  "M03" "M03" "M03" "M03" ...
##  $ DewC      : chr  "M07" "M07" "M07" "M06" ...
##  $ Altimeter : chr  "A3029" "A3030" "A3030" "A3031" ...
##  $ SLP       : chr  "SLP275" "SLP277" "SLP277" "SLP281" ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
# Convert to numeric where appropriate
dfParse <- dfParse %>%
    mutate(WindSpeed = as.integer(WindSpeed), 
           WindGust = as.numeric(WindGust), 
           Visibility = as.numeric(str_replace(Visibility, "SM", "")),
           TempC = as.integer(str_replace(TempC, "M", "-")), 
           DewC = as.integer(str_replace(DewC, "M", "-")), 
           Altimeter = as.integer(str_replace(Altimeter, "A", "")), 
           SLP = as.integer(str_replace(SLP, "SLP", "")), 
           TempF = 32 + 1.8 * as.integer(str_replace(str_sub(FahrC, 2, 5), pattern="^1", "-"))/10, 
           DewF = 32 + 1.8 * as.integer(str_replace(str_sub(FahrC, 6, 9), pattern="^1", "-"))/10
           )
## Warning: NAs introduced by coercion
# Investigate the data
set.seed(2003211416)
str(dfParse)
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  13 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : int  5 0 0 3 5 9 0 3 0 0 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##  $ DewC      : int  -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##  $ Altimeter : int  3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##  $ SLP       : int  275 277 277 281 286 289 290 291 295 295 ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
##  $ TempF     : num  27 26.1 27 27 27 ...
##  $ DewF      : num  19.9 19.9 19.9 21 19.9 ...
head(dfParse)
## # A tibble: 6 x 13
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 54Z ~ 300             5       NA " "           10    -3    -7      3029   275
## 2 54Z ~ 000             0       NA " "           10    -3    -7      3030   277
## 3 54Z ~ 000             0       NA " "           10    -3    -7      3030   277
## 4 54Z ~ 280             3       NA " "           10    -3    -6      3031   281
## 5 54Z ~ 310             5       NA " "           10    -3    -7      3033   286
## 6 54Z ~ 010             9       NA " "           10    -6   -10      3033   289
## # ... with 3 more variables: FahrC <chr>, TempF <dbl>, DewF <dbl>
tail(dfParse)
## # A tibble: 6 x 13
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 54Z ~ 160             6       NA " "           10     2    -4      2993   147
## 2 54Z ~ VRB             4       NA " "           10     4    -4      2990   139
## 3 54Z ~ 130             7       NA " "           10     4    -7      2989   134
## 4 54Z ~ 110             7       NA " "           10     4    -7      2988   130
## 5 54Z ~ 100            10       NA " "           10     3    -5      2986   125
## 6 54Z ~ 100            10       NA " "           10     3    -6      2986   123
## # ... with 3 more variables: FahrC <chr>, TempF <dbl>, DewF <dbl>
dfParse %>% 
    sample_n(20)
## # A tibble: 20 x 13
##    METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##    <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
##  1 54Z ~ 360             7       NA " "           10    21    15      2995   130
##  2 54Z ~ 210            13       NA " "           10    31     4      2988   108
##  3 54Z ~ 340            11       NA " "           10    -7   -13      3025   259
##  4 54Z ~ 240             9       NA " "           10    13     8      2961    18
##  5 54Z ~ 310             6       NA " "           10    -7   -19      3044   329
##  6 54Z ~ 040             7       NA " "           10    26    14      3010   179
##  7 54Z ~ 170            17       NA " "           10    31    22      2983    90
##  8 54Z ~ 160             7       NA " "           10    -1    -4      3002   182
##  9 54Z ~ 310             9       NA " "           10    13     9      3021   226
## 10 54Z ~ 110            10       NA " "           10    16    16      2990   116
## 11 54Z ~ 140             5       NA " "           10    19    18      3013   199
## 12 54Z ~ 110             6       NA " "           10    20     4      3012   197
## 13 54Z ~ 220             4       NA " "           10     3    -4      3042   311
## 14 54Z ~ 350             3       NA " "            8     9     8      3011   194
## 15 54Z ~ 080             8       NA " "           10   -12   -18      3046   340
## 16 54Z ~ 320             7       NA " "           10    16    11      2998   144
## 17 54Z ~ 330             6       NA " "           10     9     5      3016   214
## 18 54Z ~ 340            13       NA " "            3     0    -1      2979    99
## 19 54Z ~ 190             5       NA " "           10     3    -4      3024   251
## 20 54Z ~ 320            18       NA " "           10    24    12      2999   147
## # ... with 3 more variables: FahrC <chr>, TempF <dbl>, DewF <dbl>
# Check for NA values
colSums(is.na(dfParse))
##      METAR    WindDir  WindSpeed   WindGust      Dummy Visibility      TempC 
##         23         23         23       8813         23         23         23 
##       DewC  Altimeter        SLP      FahrC      TempF       DewF 
##         23         23         23         23         23         23
# Plot of counts by key metric
keyMetric <- c("WindDir", "WindSpeed", "WindGust", "Visibility", "TempC", 
               "DewC", "Altimeter", "SLP", "TempF", "DewF"
               )

for (x in keyMetric) {
    p <- dfParse %>%
        group_by_at(vars(all_of(x))) %>%
        summarize(n=n()) %>%
        ggplot(aes_string(x=x, y="n")) + 
        geom_col() + 
        labs(title=x, y="Count")
    print(p)
}

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(diff(sort(x))): no non-missing arguments to min; returning Inf
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

# There are three obvious issues
# Visibility is not correctly picked up when there is a / such as 1/2 SM
# Wind gusts are never picked up
# Sea Level Pressure is missing a digit

# Correct for visibility
# Areas that have \\d \\d/\\dSM
sm1 <- which(str_detect(metAll, pattern=" \\d/\\dSM"))
sm2 <- which(str_detect(metAll, pattern=" \\d \\d/\\dSM"))

valSM1 <- str_match(metAll, pattern="\\d/\\dSM")[sm1]
valSM1 <- str_replace(valSM1, "SM", "")
valSM1 <- as.integer(str_sub(valSM1, 1, 1)) / as.integer(str_sub(valSM1, 3, 3))

valSM2 <- str_match(metAll, pattern=" \\d \\d/\\dSM")[sm2]
valSM2 <- as.integer(str_sub(valSM2, 2, 2))

dfParse[sm1, "Visibility"] <- valSM1
dfParse[sm2, "Visibility"] <- dfParse[sm2, "Visibility"] + valSM2

dfParse %>% 
    count(Visibility)
## # A tibble: 18 x 2
##    Visibility     n
##         <dbl> <int>
##  1       0.25    20
##  2       0.5     16
##  3       0.75    15
##  4       1       19
##  5       1.25    13
##  6       1.5     17
##  7       1.75    21
##  8       2       50
##  9       2.5     38
## 10       3       70
## 11       4      108
## 12       5      108
## 13       6      146
## 14       7      189
## 15       8      221
## 16       9      290
## 17      10     7449
## 18      NA       23
# Correct for wind gusts
gustCheck <- which(str_detect(metAll, pattern="\\d{5}G\\d{2}KT"))
valGust <- str_match(metAll, pattern="\\d{5}G\\d{2}KT")[gustCheck]
valGust <- as.integer(str_sub(valGust, 7, 8))

dfParse[gustCheck, "WindGust"] <- valGust

dfParse %>% 
    count(WindGust) %>% 
    as.data.frame
##    WindGust    n
## 1        14    4
## 2        15   11
## 3        16   11
## 4        17   16
## 5        18   29
## 6        19   59
## 7        20   69
## 8        21   87
## 9        22   83
## 10       23  107
## 11       24  101
## 12       25   83
## 13       26   62
## 14       27   61
## 15       28   61
## 16       29   37
## 17       30   28
## 18       31   24
## 19       32   18
## 20       33   13
## 21       34   12
## 22       35   14
## 23       36    6
## 24       37    6
## 25       38   10
## 26       39   11
## 27       40    2
## 28       41    3
## 29       42    1
## 30       43    2
## 31       45    2
## 32       NA 7780
# Correct for SLP
dfParse <- dfParse %>%
    mutate(modSLP=ifelse(dfParse$SLP < 500, 1000 + dfParse$SLP/10, 900 + dfParse$SLP/10))

dfParse %>%
    group_by(SLP, modSLP) %>%
    summarize(n=n()) %>%
    ggplot(aes(x=SLP, y=modSLP, size=n)) + 
    geom_point(alpha=0.3)
## Warning: Removed 1 rows containing missing values (geom_point).

# Check updated plots
keyMetric <- c("WindGust", "Visibility", "modSLP")
for (x in keyMetric) {
    p <- dfParse %>%
        group_by_at(vars(all_of(x))) %>%
        summarize(n=n()) %>%
        ggplot(aes_string(x=x, y="n")) + 
        geom_col() + 
        labs(title=x, y="Count")
    print(p)
}
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

Example #14: Relationships Between METAR Variables

Many of the METAR variables are correlated/associated to one another.

Example code includes:

# Define key numeric variables
coreNum <- c("TempC", "TempF", "DewC", "DewF", "Altimeter", "modSLP", "WindSpeed", "Visibility")

# Add the date back to the file (should edit the above instead)
dfParse <- dfParse %>%
    mutate(month=lubridate::month(metarKLNK$valid))
str(dfParse)
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  15 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : int  5 0 0 3 5 9 0 3 0 0 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##  $ DewC      : int  -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##  $ Altimeter : int  3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##  $ SLP       : int  275 277 277 281 286 289 290 291 295 295 ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
##  $ TempF     : num  27 26.1 27 27 27 ...
##  $ DewF      : num  19.9 19.9 19.9 21 19.9 ...
##  $ modSLP    : num  1028 1028 1028 1028 1029 ...
##  $ month     : num  12 12 12 12 12 12 12 12 12 12 ...
# Keep only complete cases and find correlations
mtxCorr <- dfParse %>%
    mutate(month=lubridate::month(metarKLNK$valid)) %>%
    select_at(vars(all_of(coreNum))) %>%
    filter(complete.cases(.)) %>%
    cor()

# Print the correlations and show a heatmap
mtxCorr %>%
    round(2)
##            TempC TempF  DewC  DewF Altimeter modSLP WindSpeed Visibility
## TempC       1.00  1.00  0.91  0.91     -0.38  -0.48      0.13       0.19
## TempF       1.00  1.00  0.91  0.91     -0.38  -0.48      0.13       0.19
## DewC        0.91  0.91  1.00  1.00     -0.38  -0.48     -0.01       0.07
## DewF        0.91  0.91  1.00  1.00     -0.38  -0.48     -0.01       0.07
## Altimeter  -0.38 -0.38 -0.38 -0.38      1.00   0.99     -0.26       0.07
## modSLP     -0.48 -0.48 -0.48 -0.48      0.99   1.00     -0.25       0.04
## WindSpeed   0.13  0.13 -0.01 -0.01     -0.26  -0.25      1.00      -0.01
## Visibility  0.19  0.19  0.07  0.07      0.07   0.04     -0.01       1.00
corrplot::corrplot(mtxCorr, method="color", title="Lincoln, NE Hourly Weather Correlations (2016)")

# Create a function for plotting two variables against each other
plotNumCor <- function(var1, var2, title=NULL) {
    if (is.null(title)) 
        { title <- paste0("Lincoln, NE (2016) Hourly Correlations of ", var1, " and ", var2) }
    p <- dfParse %>%
        group_by_at(vars(all_of(c(var1, var2)))) %>%
        summarize(n=n()) %>%
        ggplot(aes_string(x=var1, y=var2)) + 
        geom_point(alpha=0.5, aes_string(size="n")) + 
        geom_smooth(method="lm", aes_string(weight="n")) + 
        labs(x=var1, y=var2, title=title)
    print(p)
}

# The three linear or almost linear relationships
plotNumCor("TempC", "TempF")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

plotNumCor("DewC", "DewF")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

plotNumCor("Altimeter", "modSLP")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

# Strongly and positively related
plotNumCor("TempF", "DewF")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

# Moderately negatively correlated
plotNumCor("TempF", "Altimeter")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

plotNumCor("TempF", "modSLP")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

plotNumCor("Altimeter", "WindSpeed")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

# Predict modSLP from Altimeter
lmSLP1 <- lm(modSLP ~ Altimeter, data=dfParse)
lmSLP2 <- lm(modSLP ~ Altimeter + TempF, data=dfParse)
summary(lmSLP1)
## 
## Call:
## lm(formula = modSLP ~ Altimeter, data = dfParse)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8890 -0.8172 -0.1578  0.7610  2.7890 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.233e+01  1.406e+00  -44.34   <2e-16 ***
## Altimeter    3.594e-01  4.683e-04  767.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9691 on 8788 degrees of freedom
##   (23 observations deleted due to missingness)
## Multiple R-squared:  0.9853, Adjusted R-squared:  0.9853 
## F-statistic: 5.889e+05 on 1 and 8788 DF,  p-value: < 2.2e-16
summary(lmSLP2)
## 
## Call:
## lm(formula = modSLP ~ Altimeter + TempF, data = dfParse)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.24130 -0.26737  0.00686  0.25214  1.23326 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.002e+01  5.615e-01  -17.84   <2e-16 ***
## Altimeter    3.428e-01  1.857e-04 1845.63   <2e-16 ***
## TempF       -4.559e-02  1.921e-04 -237.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3561 on 8787 degrees of freedom
##   (23 observations deleted due to missingness)
## Multiple R-squared:  0.998,  Adjusted R-squared:  0.998 
## F-statistic: 2.209e+06 on 2 and 8787 DF,  p-value: < 2.2e-16
# Plot predictions vs. actual (model 1)
dfParse %>%
    filter(!is.na(modSLP)) %>%
    mutate(pred1=predict(lmSLP1)) %>%
    count(modSLP, pred1) %>%
    ggplot(aes(x=modSLP, y=pred1)) + 
    geom_point(alpha=0.25, aes(size=n)) + 
    geom_smooth(method="lm", aes(weight=n)) + 
    labs(title="Predicted vs. Actual Sea Level Pressure - Altitude Only as Predictor", 
         subtitle="Lincoln, NE (2016) Hourly METAR", x="Sea Level Pressure", y="Predicted"
         )

# Plot predictions vs. actual (model 2)
dfParse %>%
    filter(!is.na(modSLP)) %>%
    mutate(pred2=predict(lmSLP2)) %>%
    count(modSLP, pred2) %>%
    ggplot(aes(x=modSLP, y=pred2)) + 
    geom_point(alpha=0.25, aes(size=n)) + 
    geom_smooth(method="lm", aes(weight=n)) + 
    labs(title="Predicted vs. Actual Sea Level Pressure - Altitude and Temperature as Predictor", 
         subtitle="Lincoln, NE (2016) Hourly METAR", x="Sea Level Pressure", y="Predicted"
         )

Example #15: Extracting Cloud Data from METAR

Cloud data is also included in the METAR, with the type of clouds being described as:

  • CLR - there are no clouds below 12,000 feet
  • VVddd - there is a vertical visibility of ddd hundred feet (cannot tell where the clouds are above that)
  • FEWddd - there are clouds with bases at ddd feet, and they obscure 25% or less of the sky
  • SCTddd - there are clouds with bases at ddd feet, and they obscure 25%-50% of the sky
  • BKNddd - there are clouds with bases at ddd feet, and they obscure 50%-99% of the sky
  • OVCddd - there is a full overcast with base at ddd feet

The ceiling is considered the lowest height that is measured as any of OVC, BKN, or VV.

Example code includes:

# Extract the CLR records
mtxCLR <- str_extract_all(metarKLNK$metar, pattern=" CLR ", simplify=TRUE)
if (dim(mtxCLR)[[2]] != 1) { stop("Extracted 2+ CLR from some METAR; investigate") }
isCLR <- ifelse(mtxCLR[, 1] == "", 0, 1)

# Extract the VV records
mtxVV <- str_extract_all(metarKLNK$metar, pattern="VV(\\d{3})", simplify=TRUE)
if (dim(mtxVV)[[2]] != 1) { stop("Extracted 2+ VV from some METAR; investigate") }
isVV <- ifelse(mtxVV[, 1] == "", 0, 1)
htVV <- ifelse(mtxVV[, 1] == "", NA, as.integer(str_replace(mtxVV[, 1], "VV", ""))*100)

# Extract the FEW records
mtxFEW <- str_extract_all(metarKLNK$metar, pattern="FEW(\\d{3})", simplify=TRUE)
numFEW <- apply(mtxFEW, 1, FUN=function(x) { sum((x!=""))} )

# Extract the SCT records
mtxSCT <- str_extract_all(metarKLNK$metar, pattern="SCT(\\d{3})", simplify=TRUE)
numSCT <- apply(mtxSCT, 1, FUN=function(x) { sum((x!=""))} )

# Extract the BKN records
mtxBKN <- str_extract_all(metarKLNK$metar, pattern="BKN(\\d{3})", simplify=TRUE)
numBKN <- apply(mtxBKN, 1, FUN=function(x) { sum((x!=""))} )

# Extract the OVC records
mtxOVC <- str_extract_all(metarKLNK$metar, pattern="OVC(\\d{3})", simplify=TRUE)
numOVC <- apply(mtxOVC, 1, FUN=function(x) { sum((x!=""))} )

# Summarize as a data frame
tblClouds <- tibble::tibble(isCLR=isCLR, isVV=isVV, htVV=htVV, numFEW=numFEW, 
                            numSCT=numSCT, numBKN=numBKN, numOVC=numOVC
                            )

# Get the counts
# As expected, if isCLR then nothing else, and if isVV then nothing else
tblClouds %>% 
    count(isCLR, isVV, numFEW, numSCT, numBKN, numOVC) %>%
    as.data.frame()
##    isCLR isVV numFEW numSCT numBKN numOVC    n
## 1      0    0      0      0      0      0    6
## 2      0    0      0      0      0      1 1389
## 3      0    0      0      0      1      0  307
## 4      0    0      0      0      1      1  250
## 5      0    0      0      0      2      0   50
## 6      0    0      0      0      2      1   45
## 7      0    0      0      0      3      0    8
## 8      0    0      0      1      0      0  230
## 9      0    0      0      1      0      1   73
## 10     0    0      0      1      1      0   52
## 11     0    0      0      1      1      1   42
## 12     0    0      0      1      2      0   17
## 13     0    0      0      2      0      0   16
## 14     0    0      0      2      0      1    9
## 15     0    0      0      2      1      0    6
## 16     0    0      1      0      0      0  380
## 17     0    0      1      0      0      1   90
## 18     0    0      1      0      1      0   46
## 19     0    0      1      0      1      1   62
## 20     0    0      1      0      2      0    9
## 21     0    0      1      1      0      0   39
## 22     0    0      1      1      0      1   24
## 23     0    0      1      1      1      0   28
## 24     0    0      1      2      0      0   10
## 25     0    0      2      0      0      0   24
## 26     0    0      2      0      0      1    6
## 27     0    0      2      0      1      0    5
## 28     0    0      2      1      0      0    3
## 29     0    0      3      0      0      0    2
## 30     0    1      0      0      0      0   33
## 31     1    0      0      0      0      0 5552
# Investigate the problem data
metarKLNK$metar[rowSums(tblClouds, na.rm=TRUE)==0]
## [1] "KLNK 301854Z 17011KT 10SM 30/18 A2993 RMK AO2 SLP124 T03000178 $"                           
## [2] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"                                  
## [3] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"                          
## [4] "KLNK 161554Z 09009KT 10SM 26/19 A3007 RMK AO2 SLP171 T02610194 $"                           
## [5] "KLNK 211954Z 25008KT 210V270 10SM 27/10 A3005 RMK AO2 SLP170 T02720100 $"                   
## [6] "KLNK 050854Z 00000KT 1/2SM R36/3000VP6000FT FG 09/09 A2978 RMK AO2 SLP081 T00940094 51008 $"
# Get the counts of most obscuration
tblClouds %>%
    filter(rowSums(., na.rm=TRUE) > 0) %>%
    mutate(wType=factor(case_when(isCLR==1 ~ "CLR", isVV==1 ~ "VV", numOVC > 0 ~ "OVC", 
                                  numBKN > 0 ~ "BKN", numSCT > 0 ~ "SCT", numFEW > 0 ~ "FEW", 
                                  TRUE ~ "Error"
                                  ), levels=c("VV", "OVC", "BKN", "SCT", "FEW", "CLR", "Error")
                  )
           ) %>%
    ggplot(aes(x=wType, y=..count../sum(..count..))) + 
    geom_bar() + 
    labs(title="Highest Obscuration by Cloud - Lincoln, NE (2016)", x="Cloud Type", 
         y="Proportion of Hourly Measurements"
         )

# Integrate the clouds data
mtxCloud <- cbind(mtxVV, mtxOVC, mtxBKN, mtxSCT, mtxFEW, mtxCLR)

# Cycle through to find levels of a given type
ckClouds <- function(cloudType) {
    isKey <- which(apply(mtxCloud, 2, FUN=function(x) {sum(str_detect(x, cloudType))}) > 0)
    as.integer(str_replace(mtxCloud[, min(isKey)], cloudType, "")) * 100
}
lowOVC <- ckClouds("OVC")
lowVV <- ckClouds("VV")
lowBKN <- ckClouds("BKN")
lowSCT <- ckClouds("SCT")
lowFEW <- ckClouds("FEW")

# Integrate the lowest cloud type by level
lowCloud <- tibble::tibble(lowVV, lowOVC, lowBKN, lowSCT, lowFEW)
lowCloud
## # A tibble: 8,813 x 5
##    lowVV lowOVC lowBKN lowSCT lowFEW
##    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1    NA   2800     NA     NA     NA
##  2    NA   2700     NA     NA     NA
##  3    NA   2600     NA     NA     NA
##  4    NA   2700     NA     NA     NA
##  5    NA   2700     NA   2100     NA
##  6    NA   2700     NA     NA     NA
##  7    NA   2700     NA     NA     NA
##  8    NA   2700     NA     NA     NA
##  9    NA     NA     NA     NA   2600
## 10    NA     NA     NA     NA     NA
## # ... with 8,803 more rows
# Get the lowest cloud level
minCloud <- lowCloud
minCloud[is.na(minCloud)] <- 999999
minCloudLevel <- apply(minCloud, 1, FUN=min)
minCeilingLevel <- apply(minCloud[, c("lowVV", "lowOVC", "lowBKN")], 1, FUN=min)

noCloudPct <- mean(minCloudLevel == 999999)
noCeilingPct <- mean(minCeilingLevel == 999999)

# Plot the minimum cloud level (where it exists)
data.frame(minCloudLevel, minCeilingLevel) %>%
    filter(minCloudLevel != 999999) %>%
    ggplot(aes(x=minCloudLevel)) + 
    geom_bar(aes(y=..count../sum(..count..))) + 
    geom_text(aes(x=2500, y=0.04, 
                  label=paste0(round(100*noCloudPct), "% of obs. have no clouds")
                  )
              ) + 
    labs(x="Height [ft]", y="Proportion", title="Minimum Cloud Height (when some clouds exist)", 
         subtitle="Lincoln, NE (2016)"
         )

# Plot the minimum ceiling level (where it exists)
data.frame(minCloudLevel, minCeilingLevel) %>%
    filter(minCeilingLevel != 999999) %>%
    ggplot(aes(x=minCeilingLevel)) + 
    geom_bar(aes(y=..count../sum(..count..))) + 
    geom_text(aes(x=2500, y=0.04, 
                  label=paste0(round(100*noCeilingPct), "% of obs. have no ceiling")
                  )
              ) + 
    labs(x="Height [ft]", y="Proportion", title="Minimum Ceiling Height (when a ceiling exists)", 
         subtitle="Lincoln, NE (2016)"
         )

Example #16: Plotting by factor variables

The month of the year is an interesting data point for plotting against.

Example code includes:

# Integrate the cloud data and convert month to a factor
dfFull <- cbind(dfParse, tblClouds, lowCloud) %>%
    mutate(wType=factor(case_when(isCLR==1 ~ "CLR", isVV==1 ~ "VV", numOVC > 0 ~ "OVC", 
                                  numBKN > 0 ~ "BKN", numSCT > 0 ~ "SCT", numFEW > 0 ~ "FEW", 
                                  TRUE ~ "Error"
                                  ), levels=c("VV", "OVC", "BKN", "SCT", "FEW", "CLR", "Error")
                  ), 
           month=factor(month, levels=1:12, labels=month.abb)
           )
dfFull <- tibble::as_tibble(dfFull)
str(dfFull)
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  28 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : int  5 0 0 3 5 9 0 3 0 0 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##  $ DewC      : int  -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##  $ Altimeter : int  3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##  $ SLP       : int  275 277 277 281 286 289 290 291 295 295 ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
##  $ TempF     : num  27 26.1 27 27 27 ...
##  $ DewF      : num  19.9 19.9 19.9 21 19.9 ...
##  $ modSLP    : num  1028 1028 1028 1028 1029 ...
##  $ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...
##  $ isCLR     : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ isVV      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ htVV      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ numFEW    : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ numSCT    : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ numBKN    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numOVC    : int  1 1 1 1 1 1 1 1 0 0 ...
##  $ lowVV     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowOVC    : num  2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
##  $ lowBKN    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowSCT    : num  NA NA NA NA 2100 NA NA NA NA NA ...
##  $ lowFEW    : num  NA NA NA NA NA NA NA NA 2600 NA ...
##  $ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 5 6 ...
# Run the boxplot of a factor against a numeric variable
plotFactorNumeric <- function(fctVar, numVar, title=NULL) {
    if (is.null(title)) { title <- paste0("Lincoln, NE (2016) Hourly Weather - ", numVar, " vs. ", fctVar) }
    p <- dfFull %>%
        filter(!is.na(get(fctVar)), !is.na(get(numVar))) %>%
        ggplot(aes_string(x=fctVar, y=numVar)) + 
        geom_boxplot(fill="lightblue") + 
        labs(title=title)
    print(p)
}

# Run for all of the key variables against wind speed and cloud type
keyVar <- c("WindSpeed", "Visibility", "Altimeter", "TempF", "DewF")
for (var in keyVar) { plotFactorNumeric("month", var) }

for (var in keyVar) { plotFactorNumeric("wType", var) }

# Create stacked bars for cloud type by month
dfFull %>%
    filter(!is.na(wType), wType!="Error") %>%
    ggplot(aes(x=month, fill=wType)) + 
    geom_bar(position="fill") + 
    labs(title="Lincoln, NE (2016)", x="", y="Proportion of Month")

Example #17: Functional Form - METAR download and initial wind processing

Example 12 can be converted to functional form so that the process can be applied to other reporting stations and time periods.

Example code includes:

# Function to make an initial read of the data, filter to METAR records, and check date-times
readMETAR <- function(fileName="./RInputFiles/metar_klnk_2016.txt", timeZ="54Z",
                      expMin=as.POSIXct("2015-12-31 00:54:00", tz="UTC"), expDays=365
                      ) {

    # Load METAR data
    initRead <- readr::read_csv(fileName, na=c("", "NA", "M"))
    str(initRead, give.attr=FALSE)

    # Filter to only data that ends with times ending in 54Z
    filterRead <- initRead %>%
        filter(str_detect(metar, timeZ))
    dim(filterRead)

    # Check that the dates and times included are as expected
    expDate <- expMin + lubridate::hours(0:(24*expDays - 1))
    
    # Observations expected but not recorded
    cat("\n*** OBSERVATIONS EXPECTED BUT NOT RECORDED ***\n")
    print(as.POSIXct(setdiff(expDate, filterRead$valid), origin="1970-01-01", tz="UTC"))

    # Observations recorded but not expected
    cat("\n*** OBSERVATIONS RECORDED BUT NOT EXPECTED ***\n")
    print(as.POSIXct(setdiff(filterRead$valid, expDate), origin="1970-01-01", tz="UTC"))

    # Confirmation of uniqueness
    cat("\n*** Are the extracted records unique? ***\n")
    print(length(unique(filterRead$valid)) == length(filterRead$valid))
    cat("\n")
    
    # Return the dataset as a tibble
    tibble::as_tibble(filterRead)
}
funcMETAR <- readMETAR(expDays=368)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   station = col_character(),
##   valid = col_datetime(format = ""),
##   p01i = col_character(),
##   skyc1 = col_character(),
##   skyc2 = col_character(),
##   skyc3 = col_character(),
##   skyc4 = col_logical(),
##   skyl4 = col_logical(),
##   wxcodes = col_character(),
##   ice_accretion_1hr = col_character(),
##   ice_accretion_3hr = col_character(),
##   ice_accretion_6hr = col_character(),
##   peak_wind_time = col_datetime(format = ""),
##   metar = col_character()
## )
## See spec(...) for full column specifications.
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 10594 obs. of  29 variables:
##  $ station          : chr  "LNK" "LNK" "LNK" "LNK" ...
##  $ valid            : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ tmpf             : num  27 26.1 27 27 27 ...
##  $ dwpf             : num  19.9 19.9 19.9 21 19.9 ...
##  $ relh             : num  74.5 77.3 74.5 78 74.5 ...
##  $ drct             : num  300 0 0 280 310 10 0 10 20 0 ...
##  $ sknt             : num  5 0 0 3 5 9 0 3 3 0 ...
##  $ p01i             : chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ alti             : num  30.3 30.3 30.3 30.3 30.3 ...
##  $ mslp             : num  1028 1028 1028 1028 1029 ...
##  $ vsby             : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ gust             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyc1            : chr  "OVC" "OVC" "OVC" "OVC" ...
##  $ skyc2            : chr  NA NA NA NA ...
##  $ skyc3            : chr  NA NA NA NA ...
##  $ skyc4            : logi  NA NA NA NA NA NA ...
##  $ skyl1            : num  2800 2700 2600 2700 2100 2700 2700 2700 2600 2600 ...
##  $ skyl2            : num  NA NA NA NA 2700 NA NA NA NA NA ...
##  $ skyl3            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl4            : logi  NA NA NA NA NA NA ...
##  $ wxcodes          : chr  NA NA NA NA ...
##  $ ice_accretion_1hr: chr  NA NA NA NA ...
##  $ ice_accretion_3hr: chr  NA NA NA NA ...
##  $ ice_accretion_6hr: chr  NA NA NA NA ...
##  $ peak_wind_gust   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_drct   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_time   : POSIXct, format: NA NA ...
##  $ feel             : num  20.4 26.1 27 22.9 20.4 ...
##  $ metar            : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## 
## *** OBSERVATIONS EXPECTED BUT NOT RECORDED ***
##  [1] "2016-01-19 11:54:00 UTC" "2016-05-06 11:54:00 UTC"
##  [3] "2016-05-06 12:54:00 UTC" "2016-06-17 23:54:00 UTC"
##  [5] "2016-06-18 00:54:00 UTC" "2016-06-18 07:54:00 UTC"
##  [7] "2016-07-02 15:54:00 UTC" "2016-07-13 14:54:00 UTC"
##  [9] "2016-07-13 15:54:00 UTC" "2016-07-13 16:54:00 UTC"
## [11] "2016-07-13 17:54:00 UTC" "2016-07-30 13:54:00 UTC"
## [13] "2016-08-02 07:54:00 UTC" "2016-08-05 07:54:00 UTC"
## [15] "2016-08-29 21:54:00 UTC" "2016-09-15 16:54:00 UTC"
## [17] "2016-09-16 05:54:00 UTC" "2016-11-21 00:54:00 UTC"
## [19] "2016-12-03 08:54:00 UTC"
## 
## *** OBSERVATIONS RECORDED BUT NOT EXPECTED ***
## POSIXct of length 0
## 
## *** Are the extracted records unique? ***
## [1] TRUE
funcMETAR
## # A tibble: 8,813 x 29
##    station valid                tmpf  dwpf  relh  drct  sknt p01i   alti  mslp
##    <chr>   <dttm>              <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
##  1 LNK     2015-12-31 00:54:00  27.0  19.9  74.5   300     5 0.00   30.3 1028.
##  2 LNK     2015-12-31 01:54:00  26.1  19.9  77.3     0     0 0.00   30.3 1028.
##  3 LNK     2015-12-31 02:54:00  27.0  19.9  74.5     0     0 0.00   30.3 1028.
##  4 LNK     2015-12-31 03:54:00  27.0  21.0  78     280     3 0.00   30.3 1028.
##  5 LNK     2015-12-31 04:54:00  27.0  19.9  74.5   310     5 0.00   30.3 1029.
##  6 LNK     2015-12-31 05:54:00  21.0  14    73.9    10     9 0.00   30.3 1029.
##  7 LNK     2015-12-31 06:54:00  19.0  12.0  73.7     0     0 0.00   30.3 1029 
##  8 LNK     2015-12-31 07:54:00  18.0  12.0  77.2    10     3 0.00   30.3 1029.
##  9 LNK     2015-12-31 08:54:00  14    10.0  83.9     0     0 0.00   30.3 1030.
## 10 LNK     2015-12-31 09:54:00  16.0  10.9  80.1     0     0 0.00   30.4 1030.
## # ... with 8,803 more rows, and 19 more variables: vsby <dbl>, gust <dbl>,
## #   skyc1 <chr>, skyc2 <chr>, skyc3 <chr>, skyc4 <lgl>, skyl1 <dbl>,
## #   skyl2 <dbl>, skyl3 <dbl>, skyl4 <lgl>, wxcodes <chr>,
## #   ice_accretion_1hr <chr>, ice_accretion_3hr <chr>, ice_accretion_6hr <chr>,
## #   peak_wind_gust <dbl>, peak_wind_drct <dbl>, peak_wind_time <dttm>,
## #   feel <dbl>, metar <chr>
# Extract wind speeds and direction
# The general wind format is dddssGssKT where ddd is the direction (VRB meaning variable), the main ss is the speed, and the Gss is the gust speed (optional and not always displayed)
extractWind <- function(met) {

    mtxWind <- met %>%
        pull(metar) %>%
        str_match(pattern="(\\d{3}|VRB)(\\d{2})(G\\d{2})?KT")
    cat("\n*** First 6 winds and parsing ***\n")
    print(head(mtxWind))

    cat("\n*** Table of WIND DIRECTION ***\n")
    print(table(mtxWind[, 2], useNA="ifany"))
    cat("\n*** Table of WIND SPEED ***\n")
    print(table(mtxWind[, 3], useNA="ifany"))
    cat("\n*** Table of WIND GUST ***\n")
    print(table(mtxWind[, 4], useNA="ifany"))

    # Verify that winds not captured are in fact missing from the METAR
    cat("\n *** WIND DATA WAS NOT CAPTURED FROM: *** \n")
    print(met[which(is.na(mtxWind[, 2])), "metar"])
    cat("\n")

    met %>%
        mutate(dirW=mtxWind[, 2], 
               spdW=as.numeric(mtxWind[, 3]), 
               gustW=as.numeric(str_replace(mtxWind[, 4], "G", ""))
               )
}
windMETAR <- extractWind(funcMETAR)
## 
## *** First 6 winds and parsing ***
##      [,1]      [,2]  [,3] [,4]
## [1,] "30005KT" "300" "05" NA  
## [2,] "00000KT" "000" "00" NA  
## [3,] "00000KT" "000" "00" NA  
## [4,] "28003KT" "280" "03" NA  
## [5,] "31005KT" "310" "05" NA  
## [6,] "01009KT" "010" "09" NA  
## 
## *** Table of WIND DIRECTION ***
## 
##  000  010  020  030  040  050  060  070  080  090  100  110  120  130  140  150 
##  875  269  199  146  135  121  108   95   88   65  102  158  169  245  241  339 
##  160  170  180  190  200  210  220  230  240  250  260  270  280  290  300  310 
##  463  565  517  413  284  179  142   96   73   80  105   89  121  141  147  225 
##  320  330  340  350  360  VRB <NA> 
##  234  303  352  413  383  114   19 
## 
## *** Table of WIND SPEED ***
## 
##   00   03   04   05   06   07   08   09   10   11   12   13   14   15   16   17 
##  875  615  704  664  696  651  636  599  536  451  439  371  315  276  235  173 
##   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32   34 
##  156  108   69   62   45   33   23   13   14    9   10    6    6    2    1    1 
## <NA> 
##   19 
## 
## *** Table of WIND GUST ***
## 
##  G14  G15  G16  G17  G18  G19  G20  G21  G22  G23  G24  G25  G26  G27  G28  G29 
##    6   11   11   16   30   59   69   87   83  107  101   83   62   61   61   37 
##  G30  G31  G32  G33  G34  G35  G36  G37  G38  G39  G40  G41  G42  G43  G45 <NA> 
##   28   24   18   13   12   14    6    6   10   11    2    3    1    2    2 7777 
## 
##  *** WIND DATA WAS NOT CAPTURED FROM: *** 
## # A tibble: 19 x 1
##    metar                                                                        
##    <chr>                                                                        
##  1 KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028                 
##  2 KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083                  
##  3 KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013
##  4 KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006~
##  5 KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $                 
##  6 KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007             
##  7 KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58~
##  8 KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $                    
##  9 KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106                   
## 10 KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200                
## 11 KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $                 
## 12 KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222       
## 13 KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001 
## 14 KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010 
## 15 KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183                   
## 16 KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100                   
## 17 KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $           
## 18 KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067                   
## 19 KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050
windMETAR
## # A tibble: 8,813 x 32
##    station valid                tmpf  dwpf  relh  drct  sknt p01i   alti  mslp
##    <chr>   <dttm>              <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
##  1 LNK     2015-12-31 00:54:00  27.0  19.9  74.5   300     5 0.00   30.3 1028.
##  2 LNK     2015-12-31 01:54:00  26.1  19.9  77.3     0     0 0.00   30.3 1028.
##  3 LNK     2015-12-31 02:54:00  27.0  19.9  74.5     0     0 0.00   30.3 1028.
##  4 LNK     2015-12-31 03:54:00  27.0  21.0  78     280     3 0.00   30.3 1028.
##  5 LNK     2015-12-31 04:54:00  27.0  19.9  74.5   310     5 0.00   30.3 1029.
##  6 LNK     2015-12-31 05:54:00  21.0  14    73.9    10     9 0.00   30.3 1029.
##  7 LNK     2015-12-31 06:54:00  19.0  12.0  73.7     0     0 0.00   30.3 1029 
##  8 LNK     2015-12-31 07:54:00  18.0  12.0  77.2    10     3 0.00   30.3 1029.
##  9 LNK     2015-12-31 08:54:00  14    10.0  83.9     0     0 0.00   30.3 1030.
## 10 LNK     2015-12-31 09:54:00  16.0  10.9  80.1     0     0 0.00   30.4 1030.
## # ... with 8,803 more rows, and 22 more variables: vsby <dbl>, gust <dbl>,
## #   skyc1 <chr>, skyc2 <chr>, skyc3 <chr>, skyc4 <lgl>, skyl1 <dbl>,
## #   skyl2 <dbl>, skyl3 <dbl>, skyl4 <lgl>, wxcodes <chr>,
## #   ice_accretion_1hr <chr>, ice_accretion_3hr <chr>, ice_accretion_6hr <chr>,
## #   peak_wind_gust <dbl>, peak_wind_drct <dbl>, peak_wind_time <dttm>,
## #   feel <dbl>, metar <chr>, dirW <chr>, spdW <dbl>, gustW <dbl>
# Generate basic wind plots
basicWindPlots <- function(met, desc="Lincoln, NE", gran="KLNK METAR (2016)") {

    # Plot for the wind direction
    wDir <- met %>%
        ggplot(aes(x=dirW)) + 
        geom_bar() + 
        labs(title=paste0(desc, " Wind Direction"), subtitle=gran, 
             y="# Hourly Observations", x="Wind Direction"
             )
    print(wDir)

    # Plot for the minimum, average, and maximum wind speed by wind direction
    # Wind direction 000 is reserved for 0 KT wind, while VRB is reserved for 3-6 KT variable winds
    wSpeedByDir <- met %>%
        filter(!is.na(dirW)) %>%
        group_by(dirW) %>%
        summarize(minWind=min(spdW), meanWind=mean(spdW), maxWind=max(spdW)) %>%
        ggplot(aes(x=dirW)) + 
        geom_point(aes(y=meanWind), color="red", size=2) + 
        geom_errorbar(aes(ymin=minWind, ymax=maxWind)) + 
        labs(title=paste0(desc, " Wind Speed (Max, Mean, Min) By Wind Direction"), subtitle=gran, 
             y="Wind Speed [KT]", x="Wind Direction"
             )
    print(wSpeedByDir)

    # Plot for the wind speed
    pctZero <- sum(met$spdW==0, na.rm=TRUE) / length(met$spdW)
    wSpeed <- met %>%
        ggplot(aes(x=spdW)) + 
        geom_bar(aes(y=..count../sum(..count..))) + 
        labs(title=paste0(round(100*pctZero), "% of wind speeds in ", desc, " measure 0 Knots"), 
             subtitle=gran, 
             y="% Hourly Observations", x="Wind Speed {KT]"
             )
    print(wSpeed)

    wPolarDirSpeed <- met %>% 
        filter(!is.na(dirW), dirW != "VRB", dirW != "000") %>%
        mutate(dirW=as.numeric(dirW)) %>%
        group_by(dirW, spdW) %>%
        summarize(n=n()) %>%
        ggplot(aes(x=spdW, y=dirW)) + 
        geom_point(alpha=0.1, aes(size=n)) + 
        coord_polar(theta="y") + 
        labs(title=paste0(desc, " Direction vs. Wind Speed"), subtitle=gran, x="Wind Speed [KT]") + 
        scale_y_continuous(limits=c(0, 360), breaks=c(0, 90, 180, 270, 360)) + 
        scale_x_continuous(limits=c(0, 30), breaks=c(0, 5, 10, 15, 20, 25, 30)) + 
        geom_point(aes(x=0, y=0), color="red", size=2)
    print(wPolarDirSpeed)
}
basicWindPlots(windMETAR)

## Warning: Removed 19 rows containing non-finite values (stat_count).

## Warning: Removed 4 rows containing missing values (geom_point).

Example #18: Functional Form for Extracting Key Elements from METAR

METAR parsing can also be converted to a functional form. This will need to be modified to be more general, since the codes used for a few things like clouds can vary from station to station.

Example code includes:

# Code for the initial METAR parsing
initialParseMETAR <- function(met, val, labs) {
    
    # Pull the METAR data
    metAll <- met %>%
        pull(metar)
    
    # Find the number of matching elements
    cat("\n*** Tentative Summary of Element Parsing *** \n")
    str_detect(metAll, pattern=val) %>% 
        table() %>%
        print()

    # The strings that do not match have errors in the raw data (typically, missing wind speed)
    cat("\n*** Data Not Matched *** \n")
    print(metAll[!str_detect(metAll, pattern=val)])

    # A matrix of string matches can be obtained
    mtxParse <- str_match(metAll, pattern=val)
    cat("\n*** Parsing matrix summary *** \n")
    print(dim(mtxParse))
    print(head(mtxParse))

    # Create a data frame
    dfParse <- data.frame(mtxParse, stringsAsFactors=FALSE) %>%
        mutate(dtime=met$valid, origMETAR=met$metar)
    names(dfParse) <- c(labs, "dtime", "origMETAR")
    dfParse <- tibble::as_tibble(dfParse)
    cat("\n*** Summary of the parsed data *** \n")
    glimpse(dfParse)
    
    dfParse
}


# Create a search string for METAR
valMet <- "54Z.*?(VRB|\\d{3})(\\d{2})(G\\d{2})?KT(.*?)(\\d{1,2}SM).*?\\s(M?\\d{2})/(M?\\d{2}).*?(A\\d{4}).*?RMK.*?(SLP\\d{3}).*?(T\\d{8})"

# Create the names for the search string to parse in to
labsMet <- c("METAR", "WindDir", "WindSpeed", "WindGust", "Dummy", "Visibility", 
             "TempC", "DewC", "Altimeter", "SLP", "FahrC"
             )

# Run the METAR parsing on the raw data
initMETAR <- initialParseMETAR(funcMETAR, val=valMet, labs=labsMet)
## 
## *** Tentative Summary of Element Parsing *** 
## .
## FALSE  TRUE 
##    23  8790 
## 
## *** Data Not Matched *** 
##  [1] "KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028"                   
##  [2] "KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083"                    
##  [3] "KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013"  
##  [4] "KLNK 221654Z 19007KT CLR 15/03 A2956 RMK AO2 SLP006 T01500033 $"                
##  [5] "KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006 $" 
##  [6] "KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $"                   
##  [7] "KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007"               
##  [8] "KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58011"
##  [9] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"                      
## [10] "KLNK 011554Z 32007KT CLR 22/09 A3008 RMK AO2 SLP179 T02170089 $"                
## [11] "KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106"                     
## [12] "KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200"                  
## [13] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"              
## [14] "KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $"                   
## [15] "KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222"         
## [16] "KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001"   
## [17] "KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010"   
## [18] "KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183"                     
## [19] "KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100"                     
## [20] "KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $"             
## [21] "KLNK 201554Z 15014G19KT CLR 27/22 A3007 RMK AO2 SLP174 T02670217 $"             
## [22] "KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067"                     
## [23] "KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050"                
## 
## *** Parsing matrix summary *** 
## [1] 8813   11
##      [,1]                                                                   
## [1,] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067"       
## [2,] "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067"       
## [3,] "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067"       
## [4,] "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061"       
## [5,] "54Z 31005KT 10SM SCT021 OVC027 M03/M07 A3033 RMK AO2 SLP286 T10281067"
## [6,] "54Z AUTO 01009KT 10SM OVC027 M06/M10 A3033 RMK AO2 SLP289 T10611100"  
##      [,2]  [,3] [,4] [,5] [,6]   [,7]  [,8]  [,9]    [,10]    [,11]      
## [1,] "300" "05" NA   " "  "10SM" "M03" "M07" "A3029" "SLP275" "T10281067"
## [2,] "000" "00" NA   " "  "10SM" "M03" "M07" "A3030" "SLP277" "T10331067"
## [3,] "000" "00" NA   " "  "10SM" "M03" "M07" "A3030" "SLP277" "T10281067"
## [4,] "280" "03" NA   " "  "10SM" "M03" "M06" "A3031" "SLP281" "T10281061"
## [5,] "310" "05" NA   " "  "10SM" "M03" "M07" "A3033" "SLP286" "T10281067"
## [6,] "010" "09" NA   " "  "10SM" "M06" "M10" "A3033" "SLP289" "T10611100"
## 
## *** Summary of the parsed data *** 
## Observations: 8,813
## Variables: 13
## $ METAR      <chr> "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T1...
## $ WindDir    <chr> "300", "000", "000", "280", "310", "010", "000", "010", ...
## $ WindSpeed  <chr> "05", "00", "00", "03", "05", "09", "00", "03", "00", "0...
## $ WindGust   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ Dummy      <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", "...
## $ Visibility <chr> "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", ...
## $ TempC      <chr> "M03", "M03", "M03", "M03", "M03", "M06", "M07", "M08", ...
## $ DewC       <chr> "M07", "M07", "M07", "M06", "M07", "M10", "M11", "M11", ...
## $ Altimeter  <chr> "A3029", "A3030", "A3030", "A3031", "A3033", "A3033", "A...
## $ SLP        <chr> "SLP275", "SLP277", "SLP277", "SLP281", "SLP286", "SLP28...
## $ FahrC      <chr> "T10281067", "T10331067", "T10281067", "T10281061", "T10...
## $ dtime      <dttm> 2015-12-31 00:54:00, 2015-12-31 01:54:00, 2015-12-31 02...
## $ origMETAR  <chr> "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 ...
initMETAR
## # A tibble: 8,813 x 13
##    METAR WindDir WindSpeed WindGust Dummy Visibility TempC DewC  Altimeter SLP  
##    <chr> <chr>   <chr>     <chr>    <chr> <chr>      <chr> <chr> <chr>     <chr>
##  1 54Z ~ 300     05        <NA>     " "   10SM       M03   M07   A3029     SLP2~
##  2 54Z ~ 000     00        <NA>     " "   10SM       M03   M07   A3030     SLP2~
##  3 54Z ~ 000     00        <NA>     " "   10SM       M03   M07   A3030     SLP2~
##  4 54Z ~ 280     03        <NA>     " "   10SM       M03   M06   A3031     SLP2~
##  5 54Z ~ 310     05        <NA>     " "   10SM       M03   M07   A3033     SLP2~
##  6 54Z ~ 010     09        <NA>     " "   10SM       M06   M10   A3033     SLP2~
##  7 54Z ~ 000     00        <NA>     " "   10SM       M07   M11   A3034     SLP2~
##  8 54Z ~ 010     03        <NA>     " "   10SM       M08   M11   A3034     SLP2~
##  9 54Z ~ 000     00        <NA>     " "   10SM       M10   M12   A3034     SLP2~
## 10 54Z ~ 000     00        <NA>     " "   10SM       M09   M12   A3035     SLP2~
## # ... with 8,803 more rows, and 3 more variables: FahrC <chr>, dtime <dttm>,
## #   origMETAR <chr>
# Helper function for generating plots by key variables
plotcountsByMetric <- function(df, mets) {
    
    # Plot of counts by key metric
    for (x in mets) {
        p <- df %>%
            group_by_at(vars(all_of(x))) %>%
            summarize(n=n()) %>%
            ggplot(aes_string(x=x, y="n")) + 
            geom_col() + 
            labs(title=x, y="Count")
        print(p)
    }
}


# Code for the conversion of METAR to meaningful numeric
# Should make this much more general later
convertMETAR <- function(met, metrics, seed=2003211416) {
    
    # Convert to numeric where appropriate
    dfParse <- met %>%
        mutate(WindSpeed = as.integer(WindSpeed), 
               WindGust = as.numeric(WindGust), 
               Visibility = as.numeric(str_replace(Visibility, "SM", "")),
               TempC = as.integer(str_replace(TempC, "M", "-")), 
               DewC = as.integer(str_replace(DewC, "M", "-")), 
               Altimeter = as.integer(str_replace(Altimeter, "A", "")), 
               SLP = as.integer(str_replace(SLP, "SLP", "")), 
               TempF = 32 + 1.8 * as.integer(str_replace(str_sub(FahrC, 2, 5), pattern="^1", "-"))/10, 
               DewF = 32 + 1.8 * as.integer(str_replace(str_sub(FahrC, 6, 9), pattern="^1", "-"))/10
               )

    # Investigate the data
    cat("\n *** Parsed data structure, head, tail, and random sample *** \n")
    str(dfParse)
    print(head(dfParse))
    print(tail(dfParse))
    set.seed(seed)
    dfParse %>% 
        sample_n(20) %>%
        print()

    # Check for NA values
    cat("\n *** Number of NA values *** \n")
    print(colSums(is.na(dfParse)))

    # Plot of counts by key metric
    plotcountsByMetric(dfParse, mets=metrics)
    
    # Return the parsed dataset
    dfParse
}

keyMetric <- c("WindDir", "WindSpeed", "WindGust", "Visibility", "TempC", 
               "DewC", "Altimeter", "SLP", "TempF", "DewF"
               )
convMETAR <- convertMETAR(initMETAR, metrics=keyMetric)
## Warning: NAs introduced by coercion
## 
##  *** Parsed data structure, head, tail, and random sample *** 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  15 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : int  5 0 0 3 5 9 0 3 0 0 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##  $ DewC      : int  -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##  $ Altimeter : int  3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##  $ SLP       : int  275 277 277 281 286 289 290 291 295 295 ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ origMETAR : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ TempF     : num  27 26.1 27 27 27 ...
##  $ DewF      : num  19.9 19.9 19.9 21 19.9 ...
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 54Z ~ 300             5       NA " "           10    -3    -7      3029   275
## 2 54Z ~ 000             0       NA " "           10    -3    -7      3030   277
## 3 54Z ~ 000             0       NA " "           10    -3    -7      3030   277
## 4 54Z ~ 280             3       NA " "           10    -3    -6      3031   281
## 5 54Z ~ 310             5       NA " "           10    -3    -7      3033   286
## 6 54Z ~ 010             9       NA " "           10    -6   -10      3033   289
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 54Z ~ 160             6       NA " "           10     2    -4      2993   147
## 2 54Z ~ VRB             4       NA " "           10     4    -4      2990   139
## 3 54Z ~ 130             7       NA " "           10     4    -7      2989   134
## 4 54Z ~ 110             7       NA " "           10     4    -7      2988   130
## 5 54Z ~ 100            10       NA " "           10     3    -5      2986   125
## 6 54Z ~ 100            10       NA " "           10     3    -6      2986   123
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 20 x 15
##    METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##    <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
##  1 54Z ~ 360             7       NA " "           10    21    15      2995   130
##  2 54Z ~ 210            13       NA " "           10    31     4      2988   108
##  3 54Z ~ 340            11       NA " "           10    -7   -13      3025   259
##  4 54Z ~ 240             9       NA " "           10    13     8      2961    18
##  5 54Z ~ 310             6       NA " "           10    -7   -19      3044   329
##  6 54Z ~ 040             7       NA " "           10    26    14      3010   179
##  7 54Z ~ 170            17       NA " "           10    31    22      2983    90
##  8 54Z ~ 160             7       NA " "           10    -1    -4      3002   182
##  9 54Z ~ 310             9       NA " "           10    13     9      3021   226
## 10 54Z ~ 110            10       NA " "           10    16    16      2990   116
## 11 54Z ~ 140             5       NA " "           10    19    18      3013   199
## 12 54Z ~ 110             6       NA " "           10    20     4      3012   197
## 13 54Z ~ 220             4       NA " "           10     3    -4      3042   311
## 14 54Z ~ 350             3       NA " "            8     9     8      3011   194
## 15 54Z ~ 080             8       NA " "           10   -12   -18      3046   340
## 16 54Z ~ 320             7       NA " "           10    16    11      2998   144
## 17 54Z ~ 330             6       NA " "           10     9     5      3016   214
## 18 54Z ~ 340            13       NA " "            3     0    -1      2979    99
## 19 54Z ~ 190             5       NA " "           10     3    -4      3024   251
## 20 54Z ~ 320            18       NA " "           10    24    12      2999   147
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## 
##  *** Number of NA values *** 
##      METAR    WindDir  WindSpeed   WindGust      Dummy Visibility      TempC 
##         23         23         23       8813         23         23         23 
##       DewC  Altimeter        SLP      FahrC      dtime  origMETAR      TempF 
##         23         23         23         23          0          0         23 
##       DewF 
##         23

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(diff(sort(x))): no non-missing arguments to min; returning Inf
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

# There are three obvious issues
# Visibility is not correctly picked up when there is a / such as 1/2 SM
# Wind gusts are never picked up
# Sea Level Pressure is missing a digit

# Address the visibility issues
getVisibility <- function(curMet, origMet) {
    
    # Get the original METAR data
    metAll <- origMet %>%
        pull(metar)
    
    # Correct for visibility
    # Areas that have \\d \\d/\\dSM
    sm1 <- which(str_detect(metAll, pattern=" \\d/\\dSM"))
    sm2 <- which(str_detect(metAll, pattern=" \\d \\d/\\dSM"))

    valSM1 <- str_match(metAll, pattern="\\d/\\dSM")[sm1]
    valSM1 <- str_replace(valSM1, "SM", "")
    valSM1 <- as.integer(str_sub(valSM1, 1, 1)) / as.integer(str_sub(valSM1, 3, 3))

    valSM2 <- str_match(metAll, pattern=" \\d \\d/\\dSM")[sm2]
    valSM2 <- as.integer(str_sub(valSM2, 2, 2))

    curMet[sm1, "Visibility"] <- valSM1
    curMet[sm2, "Visibility"] <- curMet[sm2, "Visibility"] + valSM2

    curMet %>% 
        count(Visibility) %>%
        print()
    
    curMet
}
parseMETAR <- getVisibility(convMETAR, origMet=funcMETAR)
## # A tibble: 18 x 2
##    Visibility     n
##         <dbl> <int>
##  1       0.25    20
##  2       0.5     16
##  3       0.75    15
##  4       1       19
##  5       1.25    13
##  6       1.5     17
##  7       1.75    21
##  8       2       50
##  9       2.5     38
## 10       3       70
## 11       4      108
## 12       5      108
## 13       6      146
## 14       7      189
## 15       8      221
## 16       9      290
## 17      10     7449
## 18      NA       23
# Correct for wind gusts
getWindGusts <- function(curMet, origMet) {

    metAll <- origMet %>%
        pull(metar)
    
    gustCheck <- which(str_detect(metAll, pattern="\\d{5}G\\d{2}KT"))
    valGust <- str_match(metAll, pattern="\\d{5}G\\d{2}KT")[gustCheck]
    valGust <- as.integer(str_sub(valGust, 7, 8))

    curMet[gustCheck, "WindGust"] <- valGust

    curMet %>% 
        count(WindGust) %>% 
        as.data.frame %>%
        print()
    
    curMet
}
parseMETAR <- getWindGusts(parseMETAR, origMet=funcMETAR)
##    WindGust    n
## 1        14    4
## 2        15   11
## 3        16   11
## 4        17   16
## 5        18   29
## 6        19   59
## 7        20   69
## 8        21   87
## 9        22   83
## 10       23  107
## 11       24  101
## 12       25   83
## 13       26   62
## 14       27   61
## 15       28   61
## 16       29   37
## 17       30   28
## 18       31   24
## 19       32   18
## 20       33   13
## 21       34   12
## 22       35   14
## 23       36    6
## 24       37    6
## 25       38   10
## 26       39   11
## 27       40    2
## 28       41    3
## 29       42    1
## 30       43    2
## 31       45    2
## 32       NA 7780
# Correct for SLP
fixSLP <- function(curMet) {

    dfParse <- curMet %>%
        mutate(modSLP=ifelse(curMet$SLP < 500, 1000 + curMet$SLP/10, 900 + curMet$SLP/10))

    p <- dfParse %>%
        group_by(SLP, modSLP) %>%
        summarize(n=n()) %>%
        ggplot(aes(x=SLP, y=modSLP, size=n)) + 
        geom_point(alpha=0.3)
    print(p)
    
    dfParse
}
parseMETAR <- fixSLP(parseMETAR)
## Warning: Removed 1 rows containing missing values (geom_point).

# Check updated plots
plotcountsByMetric(parseMETAR, mets=c("WindGust", "Visibility", "modSLP"))
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

Example #19: Functional Form For Relationships Between METAR Variables

The relationships between METAR variables can also be converted to functional form.

Example code includes:

# Function to calculate, display, and plot variable correlations
corMETAR <- function(met, numVars, subT="") {

    # Keep only complete cases and report on data kept
    dfUse <- met %>%
        select_at(vars(all_of(numVars))) %>%
        filter(complete.cases(.))
    
    nU <- nrow(dfUse)
    nM <- nrow(met)
    myPct <- round(100*nU/nM, 1)
    cat("\n *** Correlations use ", nU, " complete cases (", myPct, "% of ", nM, " total) ***\n", sep="")
    
    # Create the correlation matrix
    mtxCorr <- dfUse %>%
        cor()

    # Print the correlations
    mtxCorr %>%
        round(2) %>%
        print()

    # Display a heat map
    corrplot::corrplot(mtxCorr, method="color", title=paste0("Hourly Weather Correlations\n", subT))
}

# Define key numeric variables
coreNum <- c("TempC", "TempF", "DewC", "DewF", "Altimeter", "modSLP", "WindSpeed", "Visibility")

# Run the correlations function
corMETAR(parseMETAR, numVars=coreNum)
## 
##  *** Correlations use 8790 complete cases (99.7% of 8813 total) ***
##            TempC TempF  DewC  DewF Altimeter modSLP WindSpeed Visibility
## TempC       1.00  1.00  0.91  0.91     -0.38  -0.48      0.13       0.19
## TempF       1.00  1.00  0.91  0.91     -0.38  -0.48      0.13       0.19
## DewC        0.91  0.91  1.00  1.00     -0.38  -0.48     -0.01       0.07
## DewF        0.91  0.91  1.00  1.00     -0.38  -0.48     -0.01       0.07
## Altimeter  -0.38 -0.38 -0.38 -0.38      1.00   0.99     -0.26       0.07
## modSLP     -0.48 -0.48 -0.48 -0.48      0.99   1.00     -0.25       0.04
## WindSpeed   0.13  0.13 -0.01 -0.01     -0.26  -0.25      1.00      -0.01
## Visibility  0.19  0.19  0.07  0.07      0.07   0.04     -0.01       1.00

# Create a function for plotting two variables against each other
plotNumCor <- function(met, var1, var2, title=NULL, subT="") {
    if (is.null(title)) 
        { title <- paste0("Hourly Correlations of ", var1, " and ", var2) }
    p <- met %>%
        group_by_at(vars(all_of(c(var1, var2)))) %>%
        summarize(n=n()) %>%
        ggplot(aes_string(x=var1, y=var2)) + 
        geom_point(alpha=0.5, aes_string(size="n")) + 
        geom_smooth(method="lm", aes_string(weight="n")) + 
        labs(x=var1, y=var2, title=title, subtitle=subT)
    print(p)
}

var1List <- c("TempC", "DewC", "Altimeter", "TempF", "TempF",     "TempF",  "Altimeter")
var2List <- c("TempF", "DewF", "modSLP",    "DewF",  "Altimeter", "modSLP", "WindSpeed")

for (n in 1:length(var1List)) {
    plotNumCor(parseMETAR, var1List[n], var2List[n])
}
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

# Function for linear regressions on METAR data
lmMETAR <- function(met, y, x, yName, subT="Lincoln, NE (2016) Hourly METAR") {
    
    # Convert to formula
    myChar <- paste0(y, " ~ ", x)
    cat("\n *** Regression call is:", myChar, "***\n")
    
    # Run regression
    regr <- lm(formula(myChar), data=met)
    
    # Summarize regression
    print(summary(regr))
    
    # Predict the new values
    pred <- predict(regr, newdata=met)
    
    # Plot the predictions
    p <- met %>%
        select_at(vars(all_of(y))) %>%
        mutate(pred=pred) %>%
        group_by_at(vars(all_of(c(y, "pred")))) %>%
        summarize(n=n()) %>%
        ggplot(aes_string(x=y, y="pred")) + 
        geom_point(aes(size=n), alpha=0.25) + 
        geom_smooth(aes(weight=n), method="lm") + 
        labs(title=paste0("Predicted vs. Actual ", yName, " - ", x, " as Predictor"), 
             subtitle=subT, x=paste0("Actual ", yName), y=paste0("Predicted ", yName)
             )
    print(p)
}

lmMETAR(parseMETAR, "modSLP", "Altimeter", yName="Sea Level Pressure")
## 
##  *** Regression call is: modSLP ~ Altimeter ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8890 -0.8172 -0.1578  0.7610  2.7890 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.233e+01  1.406e+00  -44.34   <2e-16 ***
## Altimeter    3.594e-01  4.683e-04  767.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9691 on 8788 degrees of freedom
##   (23 observations deleted due to missingness)
## Multiple R-squared:  0.9853, Adjusted R-squared:  0.9853 
## F-statistic: 5.889e+05 on 1 and 8788 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

lmMETAR(parseMETAR, "modSLP", "Altimeter + TempF", yName="Sea Level Pressure")
## 
##  *** Regression call is: modSLP ~ Altimeter + TempF ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.24130 -0.26737  0.00686  0.25214  1.23326 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.002e+01  5.615e-01  -17.84   <2e-16 ***
## Altimeter    3.428e-01  1.857e-04 1845.63   <2e-16 ***
## TempF       -4.559e-02  1.921e-04 -237.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3561 on 8787 degrees of freedom
##   (23 observations deleted due to missingness)
## Multiple R-squared:  0.998,  Adjusted R-squared:  0.998 
## F-statistic: 2.209e+06 on 2 and 8787 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

Example #20: Functional Form for Extracting Cloud Data from METAR

Cloud data can also be extracted using the functional form.

Example code includes:

extractClouds <- function(met, metVar, subT="Lincoln, NE (2016) Hourly METAR") {

    metAll <- met %>%
        pull(metVar)
    
    # Extract the CLR records
    mtxCLR <- str_extract_all(metAll, pattern=" CLR ", simplify=TRUE)
    if (dim(mtxCLR)[[2]] != 1) { stop("Extracted 2+ CLR from some METAR; investigate") }
    isCLR <- ifelse(mtxCLR[, 1] == "", 0, 1)

    # Extract the VV records
    mtxVV <- str_extract_all(metAll, pattern="VV(\\d{3})", simplify=TRUE)
    if (dim(mtxVV)[[2]] > 1) { stop("Extracted 2+ VV from some METAR; investigate") }
    if ((dim(mtxVV))[[2]] == 0) {
        cat("\nNo Records with a cloud type of vertical visibility (VV)\n")
        isVV <- rep(0, times=length(isCLR))
        htVV <- rep(NA, times=length(isCLR))
    } else {
        isVV <- ifelse(mtxVV[, 1] == "", 0, 1)
        htVV <- ifelse(mtxVV[, 1] == "", NA, as.integer(str_replace(mtxVV[, 1], "VV", ""))*100)
    }

    # Extract the FEW records
    mtxFEW <- str_extract_all(metAll, pattern="FEW(\\d{3})", simplify=TRUE)
    numFEW <- apply(mtxFEW, 1, FUN=function(x) { sum((x!=""))} )

    # Extract the SCT records
    mtxSCT <- str_extract_all(metAll, pattern="SCT(\\d{3})", simplify=TRUE)
    numSCT <- apply(mtxSCT, 1, FUN=function(x) { sum((x!=""))} )

    # Extract the BKN records
    mtxBKN <- str_extract_all(metAll, pattern="BKN(\\d{3})", simplify=TRUE)
    numBKN <- apply(mtxBKN, 1, FUN=function(x) { sum((x!=""))} )

    # Extract the OVC records
    mtxOVC <- str_extract_all(metAll, pattern="OVC(\\d{3})", simplify=TRUE)
    numOVC <- apply(mtxOVC, 1, FUN=function(x) { sum((x!=""))} )

    # Summarize as a data frame
    tblClouds <- tibble::tibble(isCLR=isCLR, isVV=isVV, htVV=htVV, numFEW=numFEW, 
                                numSCT=numSCT, numBKN=numBKN, numOVC=numOVC
                                )

    # Get the counts
    cat("\n*** Counts by number of layers of each cloud type ***\n")
    tblClouds %>% 
        count(isCLR, isVV, numFEW, numSCT, numBKN, numOVC) %>%
        as.data.frame() %>%
        print()

    # Investigate the problem data
    cat("\n*** METAR records where no clouds were extracted ***\n")
    metAll[rowSums(tblClouds, na.rm=TRUE)==0] %>%
        print()
    
    # Plot the counts of most obscuration
    p <- tblClouds %>%
        filter(rowSums(., na.rm=TRUE) > 0) %>%
        mutate(wType=factor(case_when(isCLR==1 ~ "CLR", isVV==1 ~ "VV", numOVC > 0 ~ "OVC", 
                                      numBKN > 0 ~ "BKN", numSCT > 0 ~ "SCT", numFEW > 0 ~ "FEW", 
                                      TRUE ~ "Error"
                                      ), levels=c("VV", "OVC", "BKN", "SCT", "FEW", "CLR", "Error")
                            )
               ) %>%
        ggplot(aes(x=wType, y=..count../sum(..count..))) + 
        geom_bar() + 
        labs(title="Highest Obscuration by Cloud", subtitle=subT, 
             x="Cloud Type", y="Proportion of Hourly Measurements"
             )
    print(p)
    
    # Integrate the clouds data
    mtxCloud <- cbind(mtxVV, mtxOVC, mtxBKN, mtxSCT, mtxFEW, mtxCLR)
    cat("\n*** Dimensions for the cloud matrix ***\n")
    print(dim(mtxCloud))
    
    list(tblClouds=tblClouds, mtxCloud=mtxCloud)
}

# Run the initial cloud extraction
initClouds <- extractClouds(parseMETAR, metVar="origMETAR")
## 
## *** Counts by number of layers of each cloud type ***
##    isCLR isVV numFEW numSCT numBKN numOVC    n
## 1      0    0      0      0      0      0    6
## 2      0    0      0      0      0      1 1389
## 3      0    0      0      0      1      0  307
## 4      0    0      0      0      1      1  250
## 5      0    0      0      0      2      0   50
## 6      0    0      0      0      2      1   45
## 7      0    0      0      0      3      0    8
## 8      0    0      0      1      0      0  230
## 9      0    0      0      1      0      1   73
## 10     0    0      0      1      1      0   52
## 11     0    0      0      1      1      1   42
## 12     0    0      0      1      2      0   17
## 13     0    0      0      2      0      0   16
## 14     0    0      0      2      0      1    9
## 15     0    0      0      2      1      0    6
## 16     0    0      1      0      0      0  380
## 17     0    0      1      0      0      1   90
## 18     0    0      1      0      1      0   46
## 19     0    0      1      0      1      1   62
## 20     0    0      1      0      2      0    9
## 21     0    0      1      1      0      0   39
## 22     0    0      1      1      0      1   24
## 23     0    0      1      1      1      0   28
## 24     0    0      1      2      0      0   10
## 25     0    0      2      0      0      0   24
## 26     0    0      2      0      0      1    6
## 27     0    0      2      0      1      0    5
## 28     0    0      2      1      0      0    3
## 29     0    0      3      0      0      0    2
## 30     0    1      0      0      0      0   33
## 31     1    0      0      0      0      0 5552
## 
## *** METAR records where no clouds were extracted ***
## [1] "KLNK 301854Z 17011KT 10SM 30/18 A2993 RMK AO2 SLP124 T03000178 $"                           
## [2] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"                                  
## [3] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"                          
## [4] "KLNK 161554Z 09009KT 10SM 26/19 A3007 RMK AO2 SLP171 T02610194 $"                           
## [5] "KLNK 211954Z 25008KT 210V270 10SM 27/10 A3005 RMK AO2 SLP170 T02720100 $"                   
## [6] "KLNK 050854Z 00000KT 1/2SM R36/3000VP6000FT FG 09/09 A2978 RMK AO2 SLP081 T00940094 51008 $"

## 
## *** Dimensions for the cloud matrix ***
## [1] 8813   11
str(initClouds)
## List of 2
##  $ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':   8813 obs. of  7 variables:
##   ..$ isCLR : num [1:8813] 0 0 0 0 0 0 0 0 0 1 ...
##   ..$ isVV  : num [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV  : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ numFEW: int [1:8813] 0 0 0 0 0 0 0 0 1 0 ...
##   ..$ numSCT: int [1:8813] 0 0 0 0 1 0 0 0 0 0 ...
##   ..$ numBKN: int [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numOVC: int [1:8813] 1 1 1 1 1 1 1 1 0 0 ...
##  $ mtxCloud : chr [1:8813, 1:11] "" "" "" "" ...
# Cycle through to find levels of a given type
ckClouds <- function(cloudType, mtx) {
    isKey <- which(apply(mtx, 2, FUN=function(x) {sum(str_detect(x, cloudType))}) > 0)
    as.integer(str_replace(mtx[, min(isKey)], cloudType, "")) * 100
}


# Function to create the lowest cloud levels
findLowestClouds <- function(mtxCloud, subT="Lincoln, NE (2016) Hourly METAR") {

    # Find the lowest clouds by cloud type
    lowOVC <- ckClouds("OVC", mtx=mtxCloud)
    lowVV <- ckClouds("VV", mtx=mtxCloud)
    lowBKN <- ckClouds("BKN", mtx=mtxCloud)
    lowSCT <- ckClouds("SCT", mtx=mtxCloud)
    lowFEW <- ckClouds("FEW", mtx=mtxCloud)

    # Integrate the lowest cloud type by level
    lowCloud <- tibble::tibble(lowVV, lowOVC, lowBKN, lowSCT, lowFEW)
    cat("\n*** Lowest clouds by type tibble ***\n")
    print(lowCloud)

    # Get the lowest cloud level
    minCloud <- lowCloud
    minCloud[is.na(minCloud)] <- 999999
    minCloudLevel <- apply(minCloud, 1, FUN=min)
    minCeilingLevel <- apply(minCloud[, c("lowVV", "lowOVC", "lowBKN")], 1, FUN=min)

    noCloudPct <- mean(minCloudLevel == 999999)
    noCeilingPct <- mean(minCeilingLevel == 999999)

    # Plot the minimum cloud level (where it exists)
    p <- data.frame(minCloudLevel, minCeilingLevel) %>%
        filter(minCloudLevel != 999999) %>%
        ggplot(aes(x=minCloudLevel)) + 
        geom_bar(aes(y=..count../sum(..count..))) + 
        geom_text(aes(x=2500, y=0.04, 
                      label=paste0(round(100*noCloudPct), "% of obs. have no clouds")
                      )
                  ) + 
        labs(x="Height [ft]", y="Proportion", 
             title="Minimum Cloud Height (when some clouds exist)", subtitle=subT
             )
    print(p)

    # Plot the minimum ceiling level (where it exists)
    p <- data.frame(minCloudLevel, minCeilingLevel) %>%
        filter(minCeilingLevel != 999999) %>%
        ggplot(aes(x=minCeilingLevel)) + 
        geom_bar(aes(y=..count../sum(..count..))) + 
        geom_text(aes(x=2500, y=0.04, 
                      label=paste0(round(100*noCeilingPct), "% of obs. have no ceiling")
                      )
                  ) + 
        labs(x="Height [ft]", y="Proportion", 
             title="Minimum Ceiling Height (when a ceiling exists)", subtitle=subT
             )
    print(p)
    
    list(lowCloud=lowCloud, minCeilingLevel=minCeilingLevel, minCloudLevel=minCloudLevel)
}

processedClouds <-findLowestClouds(initClouds$mtxCloud)
## 
## *** Lowest clouds by type tibble ***
## # A tibble: 8,813 x 5
##    lowVV lowOVC lowBKN lowSCT lowFEW
##    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1    NA   2800     NA     NA     NA
##  2    NA   2700     NA     NA     NA
##  3    NA   2600     NA     NA     NA
##  4    NA   2700     NA     NA     NA
##  5    NA   2700     NA   2100     NA
##  6    NA   2700     NA     NA     NA
##  7    NA   2700     NA     NA     NA
##  8    NA   2700     NA     NA     NA
##  9    NA     NA     NA     NA   2600
## 10    NA     NA     NA     NA     NA
## # ... with 8,803 more rows

str(processedClouds)
## List of 3
##  $ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  5 variables:
##   ..$ lowVV : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC: num [1:8813] 2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
##   ..$ lowBKN: num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowSCT: num [1:8813] NA NA NA NA 2100 NA NA NA NA NA ...
##   ..$ lowFEW: num [1:8813] NA NA NA NA NA NA NA NA 2600 NA ...
##  $ minCeilingLevel: num [1:8813] 2800 2700 2600 2700 2700 ...
##  $ minCloudLevel  : num [1:8813] 2800 2700 2600 2700 2100 ...

Example #21: Functional Form for Plotting by factor variables

The month of the year is an interesting data point for plotting against.

Example code includes:

# Function to bind the existing parsed METAR data with the cloud data
bindMETAR <- function(dfParse, tblClouds, lowCloud) {

    # Integrate the cloud data and convert month to a factor
    dfFull <- cbind(dfParse, tblClouds, lowCloud) %>%
        mutate(wType=factor(case_when(isCLR==1 ~ "CLR", isVV==1 ~ "VV", numOVC > 0 ~ "OVC", 
                                      numBKN > 0 ~ "BKN", numSCT > 0 ~ "SCT", numFEW > 0 ~ "FEW", 
                                      TRUE ~ "Error"
                                      ), levels=c("VV", "OVC", "BKN", "SCT", "FEW", "CLR", "Error")
                            ), 
               month=factor(lubridate::month(dtime), levels=1:12, labels=month.abb)
               )
    
    dfFull <- tibble::as_tibble(dfFull)
    str(dfFull)
    
    dfFull
}

fullMETAR <- bindMETAR(dfParse=parseMETAR, 
                       tblClouds=initClouds$tblClouds, 
                       lowCloud=processedClouds$lowCloud
                       )
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  30 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : int  5 0 0 3 5 9 0 3 0 0 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##  $ DewC      : int  -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##  $ Altimeter : int  3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##  $ SLP       : int  275 277 277 281 286 289 290 291 295 295 ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ origMETAR : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ TempF     : num  27 26.1 27 27 27 ...
##  $ DewF      : num  19.9 19.9 19.9 21 19.9 ...
##  $ modSLP    : num  1028 1028 1028 1028 1029 ...
##  $ isCLR     : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ isVV      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ htVV      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ numFEW    : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ numSCT    : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ numBKN    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numOVC    : int  1 1 1 1 1 1 1 1 0 0 ...
##  $ lowVV     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowOVC    : num  2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
##  $ lowBKN    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowSCT    : num  NA NA NA NA 2100 NA NA NA NA NA ...
##  $ lowFEW    : num  NA NA NA NA NA NA NA NA 2600 NA ...
##  $ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 5 6 ...
##  $ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...
# Updated function for plotting numeric by factor
plotFactorNumeric <- function(met, fctVar, numVar, title=NULL, subT) {
    if (is.null(title)) { title <- paste0("Hourly Weather - ", numVar, " vs. ", fctVar) }
    p <- met %>%
        filter(!is.na(get(fctVar)), !is.na(get(numVar))) %>%
        ggplot(aes_string(x=fctVar, y=numVar)) + 
        geom_boxplot(fill="lightblue") + 
        labs(title=title, subtitle=subT)
    print(p)
}

# Function for creating cloud plots
makeFactorPlots <- function(met, 
                            fctVar=c("month", "wType"), 
                            keyVar=c("WindSpeed", "Visibility", "Altimeter", "TempF", "DewF"), 
                            desc="Lincoln, NE (2016) Hourly METAR"
                            ) {

    # Run for all of the key variables against wind speed and cloud type
    for (varF in fctVar) {
        for (varK in keyVar) { 
            plotFactorNumeric(met, fctVar=varF, numVar=varK, subT=desc) 
        }
    }

    # Create stacked bars for cloud type by month
    # dfFull %>%
    #     filter(!is.na(wType), wType!="Error") %>%
    #     ggplot(aes(x=month, fill=wType)) + 
    #     geom_bar(position="fill") + 
    #     labs(title="Lincoln, NE (2016)", x="", y="Proportion of Month")
}

makeFactorPlots(fullMETAR)

Example #22: Combining Functional Forms for METAR Processing

The functions can be combined in to a single routine for reading, parsing, and running EDA on METAR data..

Example code includes:

# Function to run the full process
runAllMETAR <- function(fname, timeZ, expMin, expDays, locMET, shortMET, longMET, valMet, 
                        labsMet=c("METAR", "WindDir", "WindSpeed", "WindGust", "Dummy", "Visibility", 
                                  "TempC", "DewC", "Altimeter", "SLP", "FahrC"
                                  ), 
                        keyMetric=c("WindDir", "WindSpeed", "WindGust", "Visibility", "TempC", 
                                    "DewC", "Altimeter", "SLP", "TempF", "DewF"
                                    ), 
                        coreNum=c("TempC", "TempF", "DewC", "DewF", 
                                  "Altimeter", "modSLP", "WindSpeed", "Visibility"
                                  ), 
                        var1List=c("TempC", "DewC", "Altimeter", "TempF", "TempF", "TempF", "Altimeter"), 
                        var2List=c("TempF", "DewF", "modSLP", "DewF", "Altimeter", "modSLP", "WindSpeed")
                        ) {
    
    # Read in the METAR data
    funcMETAR <- readMETAR(fileName=fname, timeZ=timeZ, expMin=expMin, expDays=expDays)
    # funcMETAR

    # Extract wind data from METAR
    windMETAR <- extractWind(funcMETAR)
    # windMETAR

    # Run basic wind plots
    basicWindPlots(windMETAR, desc=locMET, gran=shortMET)

    # Run the METAR parsing on the raw data
    initMETAR <- initialParseMETAR(funcMETAR, val=valMet, labs=labsMet)
    # initMETAR

    # Parse and convert the METAR data
    convMETAR <- convertMETAR(initMETAR, metrics=keyMetric)
    # convMETAR

    # Fix problems with visibility, wind gusts, and SLP
    parseMETAR <- getVisibility(convMETAR, origMet=funcMETAR)
    parseMETAR <- getWindGusts(parseMETAR, origMet=funcMETAR)
    parseMETAR <- fixSLP(parseMETAR)

    # Check updated plots
    plotcountsByMetric(parseMETAR, mets=c("WindGust", "Visibility", "modSLP"))

    # Run the correlations function
    corMETAR(parseMETAR, numVars=coreNum, subT=longMET)

    # Plot correlations
    for (n in 1:length(var1List)) {
        plotNumCor(parseMETAR, var1List[n], var2List[n], subT=longMET)
    }

    # Run lm models for SLP vs Altimeter and (optionally) Temperature
    lmMETAR(parseMETAR, "modSLP", "Altimeter", yName="Sea Level Pressure", subT=longMET)
    lmMETAR(parseMETAR, "modSLP", "Altimeter + TempF", yName="Sea Level Pressure", subT=longMET)

    # Run the initial cloud extraction
    initClouds <- extractClouds(parseMETAR, metVar="origMETAR", subT=longMET)
    str(initClouds)

    # Find the lowest cloud levels and lowest ceilings
    processedClouds <-findLowestClouds(initClouds$mtxCloud, subT=longMET)
    str(processedClouds)

    # Bind the processed METAR and the cloud data
    fullMETAR <- bindMETAR(dfParse=parseMETAR, 
                           tblClouds=initClouds$tblClouds, 
                           lowCloud=processedClouds$lowCloud
                           )

    # Create box plots for key weather elements against month and cloud type
    makeFactorPlots(fullMETAR, desc=longMET)
    
    # Return all of the elements
    list(fullMETAR=fullMETAR, funcMETAR=funcMETAR, windMETAR=windMETAR, 
         initMETAR=initMETAR, convMETAR=convMETAR, parseMETAR=parseMETAR, 
         initClouds=initClouds, processedClouds=processedClouds
         )
}

Followed by caching the run of the function for Lincoln, NE:

# Set key parameters for reading and interpreting METAR
fname <- "./RInputFiles/metar_klnk_2016.txt"  # file name for raw METAR data
timeZ <- "54Z"  # Zulu time that METAR is recorded at this station
expMin <- as.POSIXct("2015-12-31 00:54:00", tz="UTC")  # Expected first time read
expDays <- 368  # Expected total days read
locMET <- "Lincoln, NE"  # Description of city or location
shortMET <- "KLNK METAR (2016)"  # Station code and timing
longMET <- "Lincoln, NE Hourly METAR (2016)"  # Description of city or location and timing

# Extraction format for METAR - paste the expected Zulu time at the front
valMet <- paste0(timeZ, ".*?(VRB|\\d{3})(\\d{2})(G\\d{2})?KT(.*?)(\\d{1,2}SM).*?\\s(M?\\d{2})/(M?\\d{2}).*?(A\\d{4}).*?RMK.*?(SLP\\d{3}).*?(T\\d{8})")

# Run the process for Lincoln, NE
klnk2016METAR <- runAllMETAR(fname=fname, timeZ=timeZ, expMin=expMin, expDays=expDays, 
                             locMET=locMET, shortMET=shortMET, longMET=longMET, valMet=valMet
                             )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   station = col_character(),
##   valid = col_datetime(format = ""),
##   p01i = col_character(),
##   skyc1 = col_character(),
##   skyc2 = col_character(),
##   skyc3 = col_character(),
##   skyc4 = col_logical(),
##   skyl4 = col_logical(),
##   wxcodes = col_character(),
##   ice_accretion_1hr = col_character(),
##   ice_accretion_3hr = col_character(),
##   ice_accretion_6hr = col_character(),
##   peak_wind_time = col_datetime(format = ""),
##   metar = col_character()
## )
## See spec(...) for full column specifications.
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 10594 obs. of  29 variables:
##  $ station          : chr  "LNK" "LNK" "LNK" "LNK" ...
##  $ valid            : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ tmpf             : num  27 26.1 27 27 27 ...
##  $ dwpf             : num  19.9 19.9 19.9 21 19.9 ...
##  $ relh             : num  74.5 77.3 74.5 78 74.5 ...
##  $ drct             : num  300 0 0 280 310 10 0 10 20 0 ...
##  $ sknt             : num  5 0 0 3 5 9 0 3 3 0 ...
##  $ p01i             : chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ alti             : num  30.3 30.3 30.3 30.3 30.3 ...
##  $ mslp             : num  1028 1028 1028 1028 1029 ...
##  $ vsby             : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ gust             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyc1            : chr  "OVC" "OVC" "OVC" "OVC" ...
##  $ skyc2            : chr  NA NA NA NA ...
##  $ skyc3            : chr  NA NA NA NA ...
##  $ skyc4            : logi  NA NA NA NA NA NA ...
##  $ skyl1            : num  2800 2700 2600 2700 2100 2700 2700 2700 2600 2600 ...
##  $ skyl2            : num  NA NA NA NA 2700 NA NA NA NA NA ...
##  $ skyl3            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl4            : logi  NA NA NA NA NA NA ...
##  $ wxcodes          : chr  NA NA NA NA ...
##  $ ice_accretion_1hr: chr  NA NA NA NA ...
##  $ ice_accretion_3hr: chr  NA NA NA NA ...
##  $ ice_accretion_6hr: chr  NA NA NA NA ...
##  $ peak_wind_gust   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_drct   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_time   : POSIXct, format: NA NA ...
##  $ feel             : num  20.4 26.1 27 22.9 20.4 ...
##  $ metar            : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## 
## *** OBSERVATIONS EXPECTED BUT NOT RECORDED ***
##  [1] "2016-01-19 11:54:00 UTC" "2016-05-06 11:54:00 UTC"
##  [3] "2016-05-06 12:54:00 UTC" "2016-06-17 23:54:00 UTC"
##  [5] "2016-06-18 00:54:00 UTC" "2016-06-18 07:54:00 UTC"
##  [7] "2016-07-02 15:54:00 UTC" "2016-07-13 14:54:00 UTC"
##  [9] "2016-07-13 15:54:00 UTC" "2016-07-13 16:54:00 UTC"
## [11] "2016-07-13 17:54:00 UTC" "2016-07-30 13:54:00 UTC"
## [13] "2016-08-02 07:54:00 UTC" "2016-08-05 07:54:00 UTC"
## [15] "2016-08-29 21:54:00 UTC" "2016-09-15 16:54:00 UTC"
## [17] "2016-09-16 05:54:00 UTC" "2016-11-21 00:54:00 UTC"
## [19] "2016-12-03 08:54:00 UTC"
## 
## *** OBSERVATIONS RECORDED BUT NOT EXPECTED ***
## POSIXct of length 0
## 
## *** Are the extracted records unique? ***
## [1] TRUE
## 
## 
## *** First 6 winds and parsing ***
##      [,1]      [,2]  [,3] [,4]
## [1,] "30005KT" "300" "05" NA  
## [2,] "00000KT" "000" "00" NA  
## [3,] "00000KT" "000" "00" NA  
## [4,] "28003KT" "280" "03" NA  
## [5,] "31005KT" "310" "05" NA  
## [6,] "01009KT" "010" "09" NA  
## 
## *** Table of WIND DIRECTION ***
## 
##  000  010  020  030  040  050  060  070  080  090  100  110  120  130  140  150 
##  875  269  199  146  135  121  108   95   88   65  102  158  169  245  241  339 
##  160  170  180  190  200  210  220  230  240  250  260  270  280  290  300  310 
##  463  565  517  413  284  179  142   96   73   80  105   89  121  141  147  225 
##  320  330  340  350  360  VRB <NA> 
##  234  303  352  413  383  114   19 
## 
## *** Table of WIND SPEED ***
## 
##   00   03   04   05   06   07   08   09   10   11   12   13   14   15   16   17 
##  875  615  704  664  696  651  636  599  536  451  439  371  315  276  235  173 
##   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32   34 
##  156  108   69   62   45   33   23   13   14    9   10    6    6    2    1    1 
## <NA> 
##   19 
## 
## *** Table of WIND GUST ***
## 
##  G14  G15  G16  G17  G18  G19  G20  G21  G22  G23  G24  G25  G26  G27  G28  G29 
##    6   11   11   16   30   59   69   87   83  107  101   83   62   61   61   37 
##  G30  G31  G32  G33  G34  G35  G36  G37  G38  G39  G40  G41  G42  G43  G45 <NA> 
##   28   24   18   13   12   14    6    6   10   11    2    3    1    2    2 7777 
## 
##  *** WIND DATA WAS NOT CAPTURED FROM: *** 
## # A tibble: 19 x 1
##    metar                                                                        
##    <chr>                                                                        
##  1 KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028                 
##  2 KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083                  
##  3 KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013
##  4 KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006~
##  5 KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $                 
##  6 KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007             
##  7 KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58~
##  8 KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $                    
##  9 KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106                   
## 10 KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200                
## 11 KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $                 
## 12 KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222       
## 13 KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001 
## 14 KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010 
## 15 KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183                   
## 16 KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100                   
## 17 KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $           
## 18 KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067                   
## 19 KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050

## Warning: Removed 19 rows containing non-finite values (stat_count).

## Warning: Removed 4 rows containing missing values (geom_point).
## 
## *** Tentative Summary of Element Parsing *** 
## .
## FALSE  TRUE 
##    23  8790 
## 
## *** Data Not Matched *** 
##  [1] "KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028"                   
##  [2] "KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083"                    
##  [3] "KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013"  
##  [4] "KLNK 221654Z 19007KT CLR 15/03 A2956 RMK AO2 SLP006 T01500033 $"                
##  [5] "KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006 $" 
##  [6] "KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $"                   
##  [7] "KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007"               
##  [8] "KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58011"
##  [9] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"                      
## [10] "KLNK 011554Z 32007KT CLR 22/09 A3008 RMK AO2 SLP179 T02170089 $"                
## [11] "KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106"                     
## [12] "KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200"                  
## [13] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"              
## [14] "KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $"                   
## [15] "KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222"         
## [16] "KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001"   
## [17] "KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010"   
## [18] "KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183"                     
## [19] "KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100"                     
## [20] "KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $"             
## [21] "KLNK 201554Z 15014G19KT CLR 27/22 A3007 RMK AO2 SLP174 T02670217 $"             
## [22] "KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067"                     
## [23] "KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050"                
## 
## *** Parsing matrix summary *** 
## [1] 8813   11
##      [,1]                                                                   
## [1,] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067"       
## [2,] "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067"       
## [3,] "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067"       
## [4,] "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061"       
## [5,] "54Z 31005KT 10SM SCT021 OVC027 M03/M07 A3033 RMK AO2 SLP286 T10281067"
## [6,] "54Z AUTO 01009KT 10SM OVC027 M06/M10 A3033 RMK AO2 SLP289 T10611100"  
##      [,2]  [,3] [,4] [,5] [,6]   [,7]  [,8]  [,9]    [,10]    [,11]      
## [1,] "300" "05" NA   " "  "10SM" "M03" "M07" "A3029" "SLP275" "T10281067"
## [2,] "000" "00" NA   " "  "10SM" "M03" "M07" "A3030" "SLP277" "T10331067"
## [3,] "000" "00" NA   " "  "10SM" "M03" "M07" "A3030" "SLP277" "T10281067"
## [4,] "280" "03" NA   " "  "10SM" "M03" "M06" "A3031" "SLP281" "T10281061"
## [5,] "310" "05" NA   " "  "10SM" "M03" "M07" "A3033" "SLP286" "T10281067"
## [6,] "010" "09" NA   " "  "10SM" "M06" "M10" "A3033" "SLP289" "T10611100"
## 
## *** Summary of the parsed data *** 
## Observations: 8,813
## Variables: 13
## $ METAR      <chr> "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T1...
## $ WindDir    <chr> "300", "000", "000", "280", "310", "010", "000", "010", ...
## $ WindSpeed  <chr> "05", "00", "00", "03", "05", "09", "00", "03", "00", "0...
## $ WindGust   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ Dummy      <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", "...
## $ Visibility <chr> "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", ...
## $ TempC      <chr> "M03", "M03", "M03", "M03", "M03", "M06", "M07", "M08", ...
## $ DewC       <chr> "M07", "M07", "M07", "M06", "M07", "M10", "M11", "M11", ...
## $ Altimeter  <chr> "A3029", "A3030", "A3030", "A3031", "A3033", "A3033", "A...
## $ SLP        <chr> "SLP275", "SLP277", "SLP277", "SLP281", "SLP286", "SLP28...
## $ FahrC      <chr> "T10281067", "T10331067", "T10281067", "T10281061", "T10...
## $ dtime      <dttm> 2015-12-31 00:54:00, 2015-12-31 01:54:00, 2015-12-31 02...
## $ origMETAR  <chr> "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 ...
## Warning: NAs introduced by coercion

## 
##  *** Parsed data structure, head, tail, and random sample *** 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  15 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : int  5 0 0 3 5 9 0 3 0 0 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##  $ DewC      : int  -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##  $ Altimeter : int  3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##  $ SLP       : int  275 277 277 281 286 289 290 291 295 295 ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ origMETAR : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ TempF     : num  27 26.1 27 27 27 ...
##  $ DewF      : num  19.9 19.9 19.9 21 19.9 ...
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 54Z ~ 300             5       NA " "           10    -3    -7      3029   275
## 2 54Z ~ 000             0       NA " "           10    -3    -7      3030   277
## 3 54Z ~ 000             0       NA " "           10    -3    -7      3030   277
## 4 54Z ~ 280             3       NA " "           10    -3    -6      3031   281
## 5 54Z ~ 310             5       NA " "           10    -3    -7      3033   286
## 6 54Z ~ 010             9       NA " "           10    -6   -10      3033   289
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 54Z ~ 160             6       NA " "           10     2    -4      2993   147
## 2 54Z ~ VRB             4       NA " "           10     4    -4      2990   139
## 3 54Z ~ 130             7       NA " "           10     4    -7      2989   134
## 4 54Z ~ 110             7       NA " "           10     4    -7      2988   130
## 5 54Z ~ 100            10       NA " "           10     3    -5      2986   125
## 6 54Z ~ 100            10       NA " "           10     3    -6      2986   123
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 20 x 15
##    METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##    <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
##  1 54Z ~ 360             7       NA " "           10    21    15      2995   130
##  2 54Z ~ 210            13       NA " "           10    31     4      2988   108
##  3 54Z ~ 340            11       NA " "           10    -7   -13      3025   259
##  4 54Z ~ 240             9       NA " "           10    13     8      2961    18
##  5 54Z ~ 310             6       NA " "           10    -7   -19      3044   329
##  6 54Z ~ 040             7       NA " "           10    26    14      3010   179
##  7 54Z ~ 170            17       NA " "           10    31    22      2983    90
##  8 54Z ~ 160             7       NA " "           10    -1    -4      3002   182
##  9 54Z ~ 310             9       NA " "           10    13     9      3021   226
## 10 54Z ~ 110            10       NA " "           10    16    16      2990   116
## 11 54Z ~ 140             5       NA " "           10    19    18      3013   199
## 12 54Z ~ 110             6       NA " "           10    20     4      3012   197
## 13 54Z ~ 220             4       NA " "           10     3    -4      3042   311
## 14 54Z ~ 350             3       NA " "            8     9     8      3011   194
## 15 54Z ~ 080             8       NA " "           10   -12   -18      3046   340
## 16 54Z ~ 320             7       NA " "           10    16    11      2998   144
## 17 54Z ~ 330             6       NA " "           10     9     5      3016   214
## 18 54Z ~ 340            13       NA " "            3     0    -1      2979    99
## 19 54Z ~ 190             5       NA " "           10     3    -4      3024   251
## 20 54Z ~ 320            18       NA " "           10    24    12      2999   147
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## 
##  *** Number of NA values *** 
##      METAR    WindDir  WindSpeed   WindGust      Dummy Visibility      TempC 
##         23         23         23       8813         23         23         23 
##       DewC  Altimeter        SLP      FahrC      dtime  origMETAR      TempF 
##         23         23         23         23          0          0         23 
##       DewF 
##         23

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(diff(sort(x))): no non-missing arguments to min; returning Inf
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## # A tibble: 18 x 2
##    Visibility     n
##         <dbl> <int>
##  1       0.25    20
##  2       0.5     16
##  3       0.75    15
##  4       1       19
##  5       1.25    13
##  6       1.5     17
##  7       1.75    21
##  8       2       50
##  9       2.5     38
## 10       3       70
## 11       4      108
## 12       5      108
## 13       6      146
## 14       7      189
## 15       8      221
## 16       9      290
## 17      10     7449
## 18      NA       23
##    WindGust    n
## 1        14    4
## 2        15   11
## 3        16   11
## 4        17   16
## 5        18   29
## 6        19   59
## 7        20   69
## 8        21   87
## 9        22   83
## 10       23  107
## 11       24  101
## 12       25   83
## 13       26   62
## 14       27   61
## 15       28   61
## 16       29   37
## 17       30   28
## 18       31   24
## 19       32   18
## 20       33   13
## 21       34   12
## 22       35   14
## 23       36    6
## 24       37    6
## 25       38   10
## 26       39   11
## 27       40    2
## 28       41    3
## 29       42    1
## 30       43    2
## 31       45    2
## 32       NA 7780
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## 
##  *** Correlations use 8790 complete cases (99.7% of 8813 total) ***
##            TempC TempF  DewC  DewF Altimeter modSLP WindSpeed Visibility
## TempC       1.00  1.00  0.91  0.91     -0.38  -0.48      0.13       0.19
## TempF       1.00  1.00  0.91  0.91     -0.38  -0.48      0.13       0.19
## DewC        0.91  0.91  1.00  1.00     -0.38  -0.48     -0.01       0.07
## DewF        0.91  0.91  1.00  1.00     -0.38  -0.48     -0.01       0.07
## Altimeter  -0.38 -0.38 -0.38 -0.38      1.00   0.99     -0.26       0.07
## modSLP     -0.48 -0.48 -0.48 -0.48      0.99   1.00     -0.25       0.04
## WindSpeed   0.13  0.13 -0.01 -0.01     -0.26  -0.25      1.00      -0.01
## Visibility  0.19  0.19  0.07  0.07      0.07   0.04     -0.01       1.00

## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8890 -0.8172 -0.1578  0.7610  2.7890 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.233e+01  1.406e+00  -44.34   <2e-16 ***
## Altimeter    3.594e-01  4.683e-04  767.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9691 on 8788 degrees of freedom
##   (23 observations deleted due to missingness)
## Multiple R-squared:  0.9853, Adjusted R-squared:  0.9853 
## F-statistic: 5.889e+05 on 1 and 8788 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter + TempF ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.24130 -0.26737  0.00686  0.25214  1.23326 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.002e+01  5.615e-01  -17.84   <2e-16 ***
## Altimeter    3.428e-01  1.857e-04 1845.63   <2e-16 ***
## TempF       -4.559e-02  1.921e-04 -237.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3561 on 8787 degrees of freedom
##   (23 observations deleted due to missingness)
## Multiple R-squared:  0.998,  Adjusted R-squared:  0.998 
## F-statistic: 2.209e+06 on 2 and 8787 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
## *** Counts by number of layers of each cloud type ***
##    isCLR isVV numFEW numSCT numBKN numOVC    n
## 1      0    0      0      0      0      0    6
## 2      0    0      0      0      0      1 1389
## 3      0    0      0      0      1      0  307
## 4      0    0      0      0      1      1  250
## 5      0    0      0      0      2      0   50
## 6      0    0      0      0      2      1   45
## 7      0    0      0      0      3      0    8
## 8      0    0      0      1      0      0  230
## 9      0    0      0      1      0      1   73
## 10     0    0      0      1      1      0   52
## 11     0    0      0      1      1      1   42
## 12     0    0      0      1      2      0   17
## 13     0    0      0      2      0      0   16
## 14     0    0      0      2      0      1    9
## 15     0    0      0      2      1      0    6
## 16     0    0      1      0      0      0  380
## 17     0    0      1      0      0      1   90
## 18     0    0      1      0      1      0   46
## 19     0    0      1      0      1      1   62
## 20     0    0      1      0      2      0    9
## 21     0    0      1      1      0      0   39
## 22     0    0      1      1      0      1   24
## 23     0    0      1      1      1      0   28
## 24     0    0      1      2      0      0   10
## 25     0    0      2      0      0      0   24
## 26     0    0      2      0      0      1    6
## 27     0    0      2      0      1      0    5
## 28     0    0      2      1      0      0    3
## 29     0    0      3      0      0      0    2
## 30     0    1      0      0      0      0   33
## 31     1    0      0      0      0      0 5552
## 
## *** METAR records where no clouds were extracted ***
## [1] "KLNK 301854Z 17011KT 10SM 30/18 A2993 RMK AO2 SLP124 T03000178 $"                           
## [2] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"                                  
## [3] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"                          
## [4] "KLNK 161554Z 09009KT 10SM 26/19 A3007 RMK AO2 SLP171 T02610194 $"                           
## [5] "KLNK 211954Z 25008KT 210V270 10SM 27/10 A3005 RMK AO2 SLP170 T02720100 $"                   
## [6] "KLNK 050854Z 00000KT 1/2SM R36/3000VP6000FT FG 09/09 A2978 RMK AO2 SLP081 T00940094 51008 $"

## 
## *** Dimensions for the cloud matrix ***
## [1] 8813   11
## List of 2
##  $ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':   8813 obs. of  7 variables:
##   ..$ isCLR : num [1:8813] 0 0 0 0 0 0 0 0 0 1 ...
##   ..$ isVV  : num [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV  : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ numFEW: int [1:8813] 0 0 0 0 0 0 0 0 1 0 ...
##   ..$ numSCT: int [1:8813] 0 0 0 0 1 0 0 0 0 0 ...
##   ..$ numBKN: int [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numOVC: int [1:8813] 1 1 1 1 1 1 1 1 0 0 ...
##  $ mtxCloud : chr [1:8813, 1:11] "" "" "" "" ...
## 
## *** Lowest clouds by type tibble ***
## # A tibble: 8,813 x 5
##    lowVV lowOVC lowBKN lowSCT lowFEW
##    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1    NA   2800     NA     NA     NA
##  2    NA   2700     NA     NA     NA
##  3    NA   2600     NA     NA     NA
##  4    NA   2700     NA     NA     NA
##  5    NA   2700     NA   2100     NA
##  6    NA   2700     NA     NA     NA
##  7    NA   2700     NA     NA     NA
##  8    NA   2700     NA     NA     NA
##  9    NA     NA     NA     NA   2600
## 10    NA     NA     NA     NA     NA
## # ... with 8,803 more rows

## List of 3
##  $ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  5 variables:
##   ..$ lowVV : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC: num [1:8813] 2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
##   ..$ lowBKN: num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowSCT: num [1:8813] NA NA NA NA 2100 NA NA NA NA NA ...
##   ..$ lowFEW: num [1:8813] NA NA NA NA NA NA NA NA 2600 NA ...
##  $ minCeilingLevel: num [1:8813] 2800 2700 2600 2700 2700 ...
##  $ minCloudLevel  : num [1:8813] 2800 2700 2600 2700 2100 ...
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  30 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : int  5 0 0 3 5 9 0 3 0 0 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##  $ DewC      : int  -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##  $ Altimeter : int  3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##  $ SLP       : int  275 277 277 281 286 289 290 291 295 295 ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ origMETAR : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ TempF     : num  27 26.1 27 27 27 ...
##  $ DewF      : num  19.9 19.9 19.9 21 19.9 ...
##  $ modSLP    : num  1028 1028 1028 1028 1029 ...
##  $ isCLR     : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ isVV      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ htVV      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ numFEW    : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ numSCT    : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ numBKN    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numOVC    : int  1 1 1 1 1 1 1 1 0 0 ...
##  $ lowVV     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowOVC    : num  2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
##  $ lowBKN    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowSCT    : num  NA NA NA NA 2100 NA NA NA NA NA ...
##  $ lowFEW    : num  NA NA NA NA NA NA NA NA 2600 NA ...
##  $ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 5 6 ...
##  $ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...

str(klnk2016METAR)
## List of 8
##  $ fullMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  30 variables:
##   ..$ METAR     : chr [1:8813] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ WindDir   : chr [1:8813] "300" "000" "000" "280" ...
##   ..$ WindSpeed : int [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
##   ..$ WindGust  : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8813] " " " " " " " " ...
##   ..$ Visibility: num [1:8813] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8813] -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##   ..$ DewC      : int [1:8813] -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##   ..$ Altimeter : int [1:8813] 3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##   ..$ SLP       : int [1:8813] 275 277 277 281 286 289 290 291 295 295 ...
##   ..$ FahrC     : chr [1:8813] "T10281067" "T10331067" "T10281067" "T10281061" ...
##   ..$ dtime     : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##   ..$ origMETAR : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ TempF     : num [1:8813] 27 26.1 27 27 27 ...
##   ..$ DewF      : num [1:8813] 19.9 19.9 19.9 21 19.9 ...
##   ..$ modSLP    : num [1:8813] 1028 1028 1028 1028 1029 ...
##   ..$ isCLR     : num [1:8813] 0 0 0 0 0 0 0 0 0 1 ...
##   ..$ isVV      : num [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV      : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ numFEW    : int [1:8813] 0 0 0 0 0 0 0 0 1 0 ...
##   ..$ numSCT    : int [1:8813] 0 0 0 0 1 0 0 0 0 0 ...
##   ..$ numBKN    : int [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numOVC    : int [1:8813] 1 1 1 1 1 1 1 1 0 0 ...
##   ..$ lowVV     : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC    : num [1:8813] 2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
##   ..$ lowBKN    : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowSCT    : num [1:8813] NA NA NA NA 2100 NA NA NA NA NA ...
##   ..$ lowFEW    : num [1:8813] NA NA NA NA NA NA NA NA 2600 NA ...
##   ..$ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 5 6 ...
##   ..$ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...
##  $ funcMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  29 variables:
##   ..$ station          : chr [1:8813] "LNK" "LNK" "LNK" "LNK" ...
##   ..$ valid            : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##   ..$ tmpf             : num [1:8813] 27 26.1 27 27 27 ...
##   ..$ dwpf             : num [1:8813] 19.9 19.9 19.9 21 19.9 ...
##   ..$ relh             : num [1:8813] 74.5 77.3 74.5 78 74.5 ...
##   ..$ drct             : num [1:8813] 300 0 0 280 310 10 0 10 0 0 ...
##   ..$ sknt             : num [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
##   ..$ p01i             : chr [1:8813] "0.00" "0.00" "0.00" "0.00" ...
##   ..$ alti             : num [1:8813] 30.3 30.3 30.3 30.3 30.3 ...
##   ..$ mslp             : num [1:8813] 1028 1028 1028 1028 1029 ...
##   ..$ vsby             : num [1:8813] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ gust             : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyc1            : chr [1:8813] "OVC" "OVC" "OVC" "OVC" ...
##   ..$ skyc2            : chr [1:8813] NA NA NA NA ...
##   ..$ skyc3            : chr [1:8813] NA NA NA NA ...
##   ..$ skyc4            : logi [1:8813] NA NA NA NA NA NA ...
##   ..$ skyl1            : num [1:8813] 2800 2700 2600 2700 2100 2700 2700 2700 2600 NA ...
##   ..$ skyl2            : num [1:8813] NA NA NA NA 2700 NA NA NA NA NA ...
##   ..$ skyl3            : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl4            : logi [1:8813] NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8813] NA NA NA NA ...
##   ..$ ice_accretion_1hr: chr [1:8813] NA NA NA NA ...
##   ..$ ice_accretion_3hr: chr [1:8813] NA NA NA NA ...
##   ..$ ice_accretion_6hr: chr [1:8813] NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_drct   : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_time   : POSIXct[1:8813], format: NA NA ...
##   ..$ feel             : num [1:8813] 20.4 26.1 27 22.9 20.4 ...
##   ..$ metar            : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..- attr(*, "spec")=
##   .. .. cols(
##   .. ..   station = col_character(),
##   .. ..   valid = col_datetime(format = ""),
##   .. ..   tmpf = col_double(),
##   .. ..   dwpf = col_double(),
##   .. ..   relh = col_double(),
##   .. ..   drct = col_double(),
##   .. ..   sknt = col_double(),
##   .. ..   p01i = col_character(),
##   .. ..   alti = col_double(),
##   .. ..   mslp = col_double(),
##   .. ..   vsby = col_double(),
##   .. ..   gust = col_double(),
##   .. ..   skyc1 = col_character(),
##   .. ..   skyc2 = col_character(),
##   .. ..   skyc3 = col_character(),
##   .. ..   skyc4 = col_logical(),
##   .. ..   skyl1 = col_double(),
##   .. ..   skyl2 = col_double(),
##   .. ..   skyl3 = col_double(),
##   .. ..   skyl4 = col_logical(),
##   .. ..   wxcodes = col_character(),
##   .. ..   ice_accretion_1hr = col_character(),
##   .. ..   ice_accretion_3hr = col_character(),
##   .. ..   ice_accretion_6hr = col_character(),
##   .. ..   peak_wind_gust = col_double(),
##   .. ..   peak_wind_drct = col_double(),
##   .. ..   peak_wind_time = col_datetime(format = ""),
##   .. ..   feel = col_double(),
##   .. ..   metar = col_character()
##   .. .. )
##  $ windMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  32 variables:
##   ..$ station          : chr [1:8813] "LNK" "LNK" "LNK" "LNK" ...
##   ..$ valid            : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##   ..$ tmpf             : num [1:8813] 27 26.1 27 27 27 ...
##   ..$ dwpf             : num [1:8813] 19.9 19.9 19.9 21 19.9 ...
##   ..$ relh             : num [1:8813] 74.5 77.3 74.5 78 74.5 ...
##   ..$ drct             : num [1:8813] 300 0 0 280 310 10 0 10 0 0 ...
##   ..$ sknt             : num [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
##   ..$ p01i             : chr [1:8813] "0.00" "0.00" "0.00" "0.00" ...
##   ..$ alti             : num [1:8813] 30.3 30.3 30.3 30.3 30.3 ...
##   ..$ mslp             : num [1:8813] 1028 1028 1028 1028 1029 ...
##   ..$ vsby             : num [1:8813] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ gust             : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyc1            : chr [1:8813] "OVC" "OVC" "OVC" "OVC" ...
##   ..$ skyc2            : chr [1:8813] NA NA NA NA ...
##   ..$ skyc3            : chr [1:8813] NA NA NA NA ...
##   ..$ skyc4            : logi [1:8813] NA NA NA NA NA NA ...
##   ..$ skyl1            : num [1:8813] 2800 2700 2600 2700 2100 2700 2700 2700 2600 NA ...
##   ..$ skyl2            : num [1:8813] NA NA NA NA 2700 NA NA NA NA NA ...
##   ..$ skyl3            : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl4            : logi [1:8813] NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8813] NA NA NA NA ...
##   ..$ ice_accretion_1hr: chr [1:8813] NA NA NA NA ...
##   ..$ ice_accretion_3hr: chr [1:8813] NA NA NA NA ...
##   ..$ ice_accretion_6hr: chr [1:8813] NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_drct   : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_time   : POSIXct[1:8813], format: NA NA ...
##   ..$ feel             : num [1:8813] 20.4 26.1 27 22.9 20.4 ...
##   ..$ metar            : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ dirW             : chr [1:8813] "300" "000" "000" "280" ...
##   ..$ spdW             : num [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
##   ..$ gustW            : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##  $ initMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  13 variables:
##   ..$ METAR     : chr [1:8813] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ WindDir   : chr [1:8813] "300" "000" "000" "280" ...
##   ..$ WindSpeed : chr [1:8813] "05" "00" "00" "03" ...
##   ..$ WindGust  : chr [1:8813] NA NA NA NA ...
##   ..$ Dummy     : chr [1:8813] " " " " " " " " ...
##   ..$ Visibility: chr [1:8813] "10SM" "10SM" "10SM" "10SM" ...
##   ..$ TempC     : chr [1:8813] "M03" "M03" "M03" "M03" ...
##   ..$ DewC      : chr [1:8813] "M07" "M07" "M07" "M06" ...
##   ..$ Altimeter : chr [1:8813] "A3029" "A3030" "A3030" "A3031" ...
##   ..$ SLP       : chr [1:8813] "SLP275" "SLP277" "SLP277" "SLP281" ...
##   ..$ FahrC     : chr [1:8813] "T10281067" "T10331067" "T10281067" "T10281061" ...
##   ..$ dtime     : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##   ..$ origMETAR : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ convMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  15 variables:
##   ..$ METAR     : chr [1:8813] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ WindDir   : chr [1:8813] "300" "000" "000" "280" ...
##   ..$ WindSpeed : int [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
##   ..$ WindGust  : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8813] " " " " " " " " ...
##   ..$ Visibility: num [1:8813] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8813] -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##   ..$ DewC      : int [1:8813] -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##   ..$ Altimeter : int [1:8813] 3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##   ..$ SLP       : int [1:8813] 275 277 277 281 286 289 290 291 295 295 ...
##   ..$ FahrC     : chr [1:8813] "T10281067" "T10331067" "T10281067" "T10281061" ...
##   ..$ dtime     : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##   ..$ origMETAR : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ TempF     : num [1:8813] 27 26.1 27 27 27 ...
##   ..$ DewF      : num [1:8813] 19.9 19.9 19.9 21 19.9 ...
##  $ parseMETAR     :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  16 variables:
##   ..$ METAR     : chr [1:8813] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ WindDir   : chr [1:8813] "300" "000" "000" "280" ...
##   ..$ WindSpeed : int [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
##   ..$ WindGust  : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8813] " " " " " " " " ...
##   ..$ Visibility: num [1:8813] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8813] -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##   ..$ DewC      : int [1:8813] -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##   ..$ Altimeter : int [1:8813] 3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##   ..$ SLP       : int [1:8813] 275 277 277 281 286 289 290 291 295 295 ...
##   ..$ FahrC     : chr [1:8813] "T10281067" "T10331067" "T10281067" "T10281061" ...
##   ..$ dtime     : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##   ..$ origMETAR : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ TempF     : num [1:8813] 27 26.1 27 27 27 ...
##   ..$ DewF      : num [1:8813] 19.9 19.9 19.9 21 19.9 ...
##   ..$ modSLP    : num [1:8813] 1028 1028 1028 1028 1029 ...
##  $ initClouds     :List of 2
##   ..$ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  7 variables:
##   .. ..$ isCLR : num [1:8813] 0 0 0 0 0 0 0 0 0 1 ...
##   .. ..$ isVV  : num [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ htVV  : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ numFEW: int [1:8813] 0 0 0 0 0 0 0 0 1 0 ...
##   .. ..$ numSCT: int [1:8813] 0 0 0 0 1 0 0 0 0 0 ...
##   .. ..$ numBKN: int [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ numOVC: int [1:8813] 1 1 1 1 1 1 1 1 0 0 ...
##   ..$ mtxCloud : chr [1:8813, 1:11] "" "" "" "" ...
##  $ processedClouds:List of 3
##   ..$ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame':  8813 obs. of  5 variables:
##   .. ..$ lowVV : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowOVC: num [1:8813] 2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
##   .. ..$ lowBKN: num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowSCT: num [1:8813] NA NA NA NA 2100 NA NA NA NA NA ...
##   .. ..$ lowFEW: num [1:8813] NA NA NA NA NA NA NA NA 2600 NA ...
##   ..$ minCeilingLevel: num [1:8813] 2800 2700 2600 2700 2700 ...
##   ..$ minCloudLevel  : num [1:8813] 2800 2700 2600 2700 2100 ...

Example #23: Running for a Different Station

The functions can be run for a different station.

Example code includes:

# Set key parameters for reading and interpreting METAR
fname <- "./RInputFiles/metar_kord_2016.txt"  # file name for raw METAR data
timeZ <- "51Z"  # Zulu time that METAR is recorded at this station
expMin <- as.POSIXct("2015-12-31 00:51:00", tz="UTC")  # Expected first time read
expDays <- 368  # Expected total days read
locMET <- "Chicago, IL"  # Description of city or location
shortMET <- "KORD METAR (2016)"  # Station code and timing
longMET <- "Chicago, IL O'Hare Hourly METAR (2016)"  # Description of city or location and timing

# Extraction format for METAR - paste the expected Zulu time at the front
valMet <- paste0(timeZ, ".*?(VRB|\\d{3})(\\d{2})(G\\d{2})?KT(.*?)(\\d{1,2}SM).*?\\s(M?\\d{2})/(M?\\d{2}).*?(A\\d{4}).*?RMK.*?(SLP\\d{3}).*?(T\\d{8})")

# Run the process for Lincoln, NE
kord2016METAR <- runAllMETAR(fname=fname, timeZ=timeZ, expMin=expMin, expDays=expDays, 
                             locMET=locMET, shortMET=shortMET, longMET=longMET, valMet=valMet
                             )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   station = col_character(),
##   valid = col_datetime(format = ""),
##   p01i = col_character(),
##   skyc1 = col_character(),
##   skyc2 = col_character(),
##   skyc3 = col_character(),
##   skyc4 = col_character(),
##   wxcodes = col_character(),
##   ice_accretion_1hr = col_logical(),
##   ice_accretion_3hr = col_logical(),
##   ice_accretion_6hr = col_logical(),
##   peak_wind_time = col_datetime(format = ""),
##   metar = col_character()
## )
## See spec(...) for full column specifications.
## Warning: 22 parsing failures.
##  row               col           expected actual                                file
## 1678 ice_accretion_1hr 1/0/T/F/TRUE/FALSE   0.01 './RInputFiles/metar_kord_2016.txt'
## 1680 ice_accretion_6hr 1/0/T/F/TRUE/FALSE   0.01 './RInputFiles/metar_kord_2016.txt'
## 1902 ice_accretion_1hr 1/0/T/F/TRUE/FALSE   0.01 './RInputFiles/metar_kord_2016.txt'
## 1903 ice_accretion_1hr 1/0/T/F/TRUE/FALSE   0.01 './RInputFiles/metar_kord_2016.txt'
## 1903 ice_accretion_6hr 1/0/T/F/TRUE/FALSE   0.01 './RInputFiles/metar_kord_2016.txt'
## .... ................. .................. ...... ...................................
## See problems(...) for more details.
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 10809 obs. of  29 variables:
##  $ station          : chr  "ORD" "ORD" "ORD" "ORD" ...
##  $ valid            : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ tmpf             : num  27 27 27 27 27 ...
##  $ dwpf             : num  21.9 21.9 21 21 21 ...
##  $ relh             : num  81 81 78 78 78 ...
##  $ drct             : num  260 260 250 240 250 250 270 260 250 250 ...
##  $ sknt             : num  8 8 8 9 6 7 10 11 8 10 ...
##  $ p01i             : chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ alti             : num  30.2 30.2 30.2 30.2 30.2 ...
##  $ mslp             : num  1024 1024 1024 1024 1024 ...
##  $ vsby             : num  8 9 10 10 10 10 10 10 10 10 ...
##  $ gust             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyc1            : chr  "OVC" "OVC" "OVC" "OVC" ...
##  $ skyc2            : chr  NA NA NA NA ...
##  $ skyc3            : chr  NA NA NA NA ...
##  $ skyc4            : chr  NA NA NA NA ...
##  $ skyl1            : num  1600 1600 1500 1900 1700 1700 1600 1500 2000 1600 ...
##  $ skyl2            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl3            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl4            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wxcodes          : chr  NA NA NA NA ...
##  $ ice_accretion_1hr: logi  NA NA NA NA NA NA ...
##  $ ice_accretion_3hr: logi  NA NA NA NA NA NA ...
##  $ ice_accretion_6hr: logi  NA NA NA NA NA NA ...
##  $ peak_wind_gust   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_drct   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_time   : POSIXct, format: NA NA ...
##  $ feel             : num  17.9 17.9 17.9 17.2 19.5 ...
##  $ metar            : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
## 
## *** OBSERVATIONS EXPECTED BUT NOT RECORDED ***
##  [1] "2016-04-09 03:51:00 UTC" "2016-06-11 20:51:00 UTC"
##  [3] "2016-06-17 23:51:00 UTC" "2016-06-18 00:51:00 UTC"
##  [5] "2016-06-30 15:51:00 UTC" "2016-07-13 14:51:00 UTC"
##  [7] "2016-07-13 15:51:00 UTC" "2016-07-13 16:51:00 UTC"
##  [9] "2016-07-13 17:51:00 UTC" "2016-07-30 01:51:00 UTC"
## [11] "2016-08-05 07:51:00 UTC" "2016-08-07 14:51:00 UTC"
## [13] "2016-11-21 00:51:00 UTC" "2016-11-23 09:51:00 UTC"
## [15] "2016-11-27 18:51:00 UTC" "2016-12-01 11:51:00 UTC"
## [17] "2016-12-05 18:51:00 UTC"
## 
## *** OBSERVATIONS RECORDED BUT NOT EXPECTED ***
## POSIXct of length 0
## 
## *** Are the extracted records unique? ***
## [1] TRUE
## 
## 
## *** First 6 winds and parsing ***
##      [,1]      [,2]  [,3] [,4]
## [1,] "26008KT" "260" "08" NA  
## [2,] "26008KT" "260" "08" NA  
## [3,] "25008KT" "250" "08" NA  
## [4,] "24009KT" "240" "09" NA  
## [5,] "25006KT" "250" "06" NA  
## [6,] "25007KT" "250" "07" NA  
## 
## *** Table of WIND DIRECTION ***
## 
##  000  010  020  030  040  050  060  070  080  090  100  110  120  130  140  150 
##  512  183  170  247  241  210  181  170  134  197  142   77   74  116  168  157 
##  160  170  180  190  200  210  220  230  240  250  260  270  280  290  300  310 
##  181  269  307  333  365  337  219  280  318  344  369  342  330  282  240  196 
##  320  330  340  350  360  VRB <NA> 
##  206  216  196  197  171  137    1 
## 
## *** Table of WIND SPEED ***
## 
##   00   03   04   05   06   07   08   09   10   11   12   13   14   15   16   17 
##  512  531  635  741  821  817  775  775  624  519  473  424  316  233  172  127 
##   18   19   20   21   22   23   24   25   26   27   28   29   30   36   37 <NA> 
##   93   64   53   35   22   17   15    6    5    1    4    1    1    1    1    1 
## 
## *** Table of WIND GUST ***
## 
##  G14  G15  G16  G17  G18  G19  G20  G21  G22  G23  G24  G25  G26  G27  G28  G29 
##    5   11   40   44   80  119  109  132  128   97   88   84   71   46   36   30 
##  G30  G31  G32  G33  G34  G35  G36  G37  G38  G39  G40  G41  G42  G43  G44  G45 
##   27   18   19   15    9    5    3    3    3    2    3    1    1    1    2    2 
##  G46  G52  G54 <NA> 
##    2    1    1 7577 
## 
##  *** WIND DATA WAS NOT CAPTURED FROM: *** 
## # A tibble: 1 x 1
##   metar                                                                    
##   <chr>                                                                    
## 1 KORD 061851Z 19012G19 10SM BKN047 BKN250 32/23 A3003 RMK SLP163 T03220228

## Warning: Removed 1 rows containing non-finite values (stat_count).

## Warning: Removed 2 rows containing missing values (geom_point).
## 
## *** Tentative Summary of Element Parsing *** 
## .
## FALSE  TRUE 
##    10  8805 
## 
## *** Data Not Matched *** 
##  [1] "KORD 100151Z 34014KT 4SM -SN BR BKN008 BKN016 OVC022 00/M01 A2976 RMK T00001011"
##  [2] "KORD 100251Z 34016KT 3SM -SN BR OVC011 M01/M02 A2975 RMK T10111022"             
##  [3] "KORD 071551Z 00000KT 10SM SCT030 SCT060 BKN250 27/15 A3007"                     
##  [4] "KORD 071651Z 13005KT 10SM FEW030 BKN060 BKN250 28/17 A3007"                     
##  [5] "KORD 071751Z 10005KT 10SM SCT048 BKN220 BKN250 30/16 3006"                      
##  [6] "KORD 071851Z 10009KT 10SM SCT050 BKN230 BKN250 31/14 A3005"                     
##  [7] "KORD 061851Z 19012G19 10SM BKN047 BKN250 32/23 A3003 RMK SLP163 T03220228"      
##  [8] "KORD 261151Z 00000KT 10SM OVC029 02/M01 A3034"                                  
##  [9] "KORD 280351Z 18012KT 10SM BKN075 OVC100 06/02 A2976"                            
## [10] "KORD 210451Z 25009KT 10SM BKN250 M04/M08 A3014"                                 
## 
## *** Parsing matrix summary *** 
## [1] 8815   11
##      [,1]                                                                  
## [1,] "51Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056"       
## [2,] "51Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056"       
## [3,] "51Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061"      
## [4,] "51Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061"      
## [5,] "51Z 25006KT 10SM OVC017 M03/M06 A3021 RMK AO2 SLP239 T10281061"      
## [6,] "51Z 25007KT 10SM OVC017 M03/M06 A3020 RMK AO2 SLP237 4/002 T10281061"
##      [,2]  [,3] [,4] [,5] [,6]   [,7]  [,8]  [,9]    [,10]    [,11]      
## [1,] "260" "08" NA   " "  "8SM"  "M03" "M06" "A3021" "SLP239" "T10281056"
## [2,] "260" "08" NA   " "  "9SM"  "M03" "M06" "A3021" "SLP240" "T10281056"
## [3,] "250" "08" NA   " "  "10SM" "M03" "M06" "A3021" "SLP239" "T10281061"
## [4,] "240" "09" NA   " "  "10SM" "M03" "M06" "A3022" "SLP242" "T10281061"
## [5,] "250" "06" NA   " "  "10SM" "M03" "M06" "A3021" "SLP239" "T10281061"
## [6,] "250" "07" NA   " "  "10SM" "M03" "M06" "A3020" "SLP237" "T10281061"
## 
## *** Summary of the parsed data *** 
## Observations: 8,815
## Variables: 13
## $ METAR      <chr> "51Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10...
## $ WindDir    <chr> "260", "260", "250", "240", "250", "250", "270", "260", ...
## $ WindSpeed  <chr> "08", "08", "08", "09", "06", "07", "10", "11", "08", "1...
## $ WindGust   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ Dummy      <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", "...
## $ Visibility <chr> "8SM", "9SM", "10SM", "10SM", "10SM", "10SM", "10SM", "1...
## $ TempC      <chr> "M03", "M03", "M03", "M03", "M03", "M03", "M03", "M03", ...
## $ DewC       <chr> "M06", "M06", "M06", "M06", "M06", "M06", "M06", "M06", ...
## $ Altimeter  <chr> "A3021", "A3021", "A3021", "A3022", "A3021", "A3020", "A...
## $ SLP        <chr> "SLP239", "SLP240", "SLP239", "SLP242", "SLP239", "SLP23...
## $ FahrC      <chr> "T10281056", "T10281056", "T10281061", "T10281061", "T10...
## $ dtime      <dttm> 2015-12-31 00:51:00, 2015-12-31 01:51:00, 2015-12-31 02...
## $ origMETAR  <chr> "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 S...
## Warning: NAs introduced by coercion

## 
##  *** Parsed data structure, head, tail, and random sample *** 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  15 variables:
##  $ METAR     : chr  "51Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "51Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "51Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061" "51Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ WindDir   : chr  "260" "260" "250" "240" ...
##  $ WindSpeed : int  8 8 8 9 6 7 10 11 8 10 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  8 9 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 ...
##  $ DewC      : int  -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 ...
##  $ Altimeter : int  3021 3021 3021 3022 3021 3020 3021 3022 3022 3021 ...
##  $ SLP       : int  239 240 239 242 239 237 237 242 244 239 ...
##  $ FahrC     : chr  "T10281056" "T10281056" "T10281061" "T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ TempF     : num  27 27 27 27 27 ...
##  $ DewF      : num  21.9 21.9 21 21 21 ...
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 51Z ~ 260             8       NA " "            8    -3    -6      3021   239
## 2 51Z ~ 260             8       NA " "            9    -3    -6      3021   240
## 3 51Z ~ 250             8       NA " "           10    -3    -6      3021   239
## 4 51Z ~ 240             9       NA " "           10    -3    -6      3022   242
## 5 51Z ~ 250             6       NA " "           10    -3    -6      3021   239
## 6 51Z ~ 250             7       NA " "           10    -3    -6      3020   237
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 51Z ~ 200             8       NA " "           10     3    -7      3010   202
## 2 51Z ~ 230             8       NA " "           10     3    -7      3011   204
## 3 51Z ~ 200             6       NA " "           10     4    -7      3011   203
## 4 51Z ~ 000             0       NA " "           10     3    -8      3009   199
## 5 51Z ~ 000             0       NA " "           10     1    -7      3012   208
## 6 51Z ~ 000             0       NA " "           10     1    -7      3014   217
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 20 x 15
##    METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##    <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
##  1 51Z ~ 090             9       NA " "           10    21    19      2982    93
##  2 51Z ~ 230             7       NA " "           10    27     9      2982    97
##  3 51Z ~ 260            12       NA " "           10    -4    -8      3001   169
##  4 51Z ~ 180             8       NA " "           10    16    11      2979    83
##  5 51Z ~ 260            18       NA " "           10    -9   -15      3011   211
##  6 51Z ~ 040             7       NA " "            4    20    19      2987   111
##  7 51Z ~ 200            12       NA " "           10    26    16      3018   216
##  8 51Z ~ 250             4       NA " "           10    -3    -7      3014   215
##  9 51Z ~ 260             6       NA " "           10    19    17      2983    96
## 10 51Z ~ 140             8       NA " "           10    19    14      3009   188
## 11 51Z ~ 040             4       NA " "           10    18    14      3024   236
## 12 51Z ~ 050             8       NA " "           10    16     7      3020   227
## 13 51Z ~ 290            10       NA " "           10     1    -4      3029   263
## 14 51Z ~ 340             6       NA " "            8    15    14      2999   156
## 15 51Z ~ 290            10       NA " "           10   -18   -23      3036   298
## 16 51Z ~ 260             8       NA " "           10    18    16      2964    33
## 17 51Z ~ 030             8       NA " "           10     7     3      3003   172
## 18 51Z ~ 130             8       NA " 1 ~          2     3     3      2981   100
## 19 51Z ~ 260             7       NA " "           10    -3    -7      3034   281
## 20 51Z ~ 170             8       NA " "           10    21    14      2987   110
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## 
##  *** Number of NA values *** 
##      METAR    WindDir  WindSpeed   WindGust      Dummy Visibility      TempC 
##         10         10         10       8815         10         10         10 
##       DewC  Altimeter        SLP      FahrC      dtime  origMETAR      TempF 
##         10         10         10         10          0          0         10 
##       DewF 
##         10

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(diff(sort(x))): no non-missing arguments to min; returning Inf
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## # A tibble: 20 x 2
##    Visibility     n
##         <dbl> <int>
##  1      0         3
##  2      0.125     8
##  3      0.25     22
##  4      0.5      21
##  5      0.75     19
##  6      1        27
##  7      1.25     22
##  8      1.5      34
##  9      1.75     30
## 10      2        64
## 11      2.5      54
## 12      3        99
## 13      4       108
## 14      5       168
## 15      6       169
## 16      7       179
## 17      8       184
## 18      9       284
## 19     10      7310
## 20     NA        10
##    WindGust    n
## 1        14    5
## 2        15   10
## 3        16   39
## 4        17   44
## 5        18   80
## 6        19  119
## 7        20  109
## 8        21  132
## 9        22  127
## 10       23   97
## 11       24   88
## 12       25   84
## 13       26   71
## 14       27   46
## 15       28   36
## 16       29   30
## 17       30   27
## 18       31   18
## 19       32   19
## 20       33   15
## 21       34    9
## 22       35    5
## 23       36    3
## 24       37    3
## 25       38    3
## 26       39    2
## 27       40    3
## 28       41    1
## 29       42    1
## 30       43    1
## 31       44    2
## 32       45    2
## 33       46    2
## 34       52    1
## 35       54    1
## 36       NA 7580
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## 
##  *** Correlations use 8805 complete cases (99.9% of 8815 total) ***
##            TempC TempF  DewC  DewF Altimeter modSLP WindSpeed Visibility
## TempC       1.00  1.00  0.93  0.93     -0.22  -0.29     -0.12       0.19
## TempF       1.00  1.00  0.93  0.93     -0.22  -0.29     -0.12       0.19
## DewC        0.93  0.93  1.00  1.00     -0.27  -0.34     -0.19       0.05
## DewF        0.93  0.93  1.00  1.00     -0.28  -0.34     -0.19       0.05
## Altimeter  -0.22 -0.22 -0.27 -0.28      1.00   1.00     -0.31       0.19
## modSLP     -0.29 -0.29 -0.34 -0.34      1.00   1.00     -0.29       0.17
## WindSpeed  -0.12 -0.12 -0.19 -0.19     -0.31  -0.29      1.00       0.01
## Visibility  0.19  0.19  0.05  0.05      0.19   0.17      0.01       1.00

## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.31041 -0.47561 -0.07852  0.43888  1.66927 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.312e+01  8.598e-01  -26.89   <2e-16 ***
## Altimeter    3.464e-01  2.864e-04 1209.37   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5573 on 8803 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.994,  Adjusted R-squared:  0.994 
## F-statistic: 1.463e+06 on 1 and 8803 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter + TempF ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.69460 -0.12269  0.00055  0.12142  0.73628 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.603e+00  2.864e-01  -16.07   <2e-16 ***
## Altimeter    3.407e-01  9.502e-05 3585.30   <2e-16 ***
## TempF       -2.606e-02  9.503e-05 -274.26   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1804 on 8802 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.9994, Adjusted R-squared:  0.9994 
## F-statistic: 7.018e+06 on 2 and 8802 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
## *** Counts by number of layers of each cloud type ***
##    isCLR isVV numFEW numSCT numBKN numOVC    n
## 1      0    0      0      0      0      0    1
## 2      0    0      0      0      0      1 1404
## 3      0    0      0      0      1      0  515
## 4      0    0      0      0      1      1  551
## 5      0    0      0      0      2      0  236
## 6      0    0      0      0      2      1  142
## 7      0    0      0      0      3      0   49
## 8      0    0      0      0      3      1    9
## 9      0    0      0      0      4      0    4
## 10     0    0      0      1      0      0  381
## 11     0    0      0      1      0      1  144
## 12     0    0      0      1      1      0  234
## 13     0    0      0      1      1      1  193
## 14     0    0      0      1      2      0  114
## 15     0    0      0      1      2      1   21
## 16     0    0      0      1      3      0   21
## 17     0    0      0      1      3      1    1
## 18     0    0      0      2      0      0  173
## 19     0    0      0      2      0      1   16
## 20     0    0      0      2      1      0   61
## 21     0    0      0      2      1      1   14
## 22     0    0      0      2      2      0   14
## 23     0    0      0      3      0      0   17
## 24     0    0      1      0      0      0 1050
## 25     0    0      1      0      0      1  240
## 26     0    0      1      0      1      0  359
## 27     0    0      1      0      1      1  217
## 28     0    0      1      0      2      0  130
## 29     0    0      1      0      2      1   19
## 30     0    0      1      0      3      0   14
## 31     0    0      1      1      0      0  323
## 32     0    0      1      1      0      1  105
## 33     0    0      1      1      1      0  226
## 34     0    0      1      1      1      1   39
## 35     0    0      1      1      2      0   54
## 36     0    0      1      2      0      0   65
## 37     0    0      1      2      0      1    3
## 38     0    0      1      2      1      0   17
## 39     0    0      1      2      1      1    1
## 40     0    0      1      2      2      0    2
## 41     0    0      1      3      0      0    2
## 42     0    0      2      0      0      0  431
## 43     0    0      2      0      0      1   37
## 44     0    0      2      0      1      0  109
## 45     0    0      2      0      1      1   17
## 46     0    0      2      0      2      0    9
## 47     0    0      2      1      0      0   96
## 48     0    0      2      1      0      1    6
## 49     0    0      2      1      1      0   42
## 50     0    0      2      1      1      1    4
## 51     0    0      2      1      2      0    1
## 52     0    0      2      2      0      0    9
## 53     0    0      3      0      0      0    5
## 54     0    0      4      0      0      0    1
## 55     0    1      0      0      0      0   55
## 56     1    0      0      0      0      0  812
## 
## *** METAR records where no clouds were extracted ***
## [1] "KORD 071951Z AUTO 07005KT 10SM 28/14 A3004 RMK AO2 SLP169 T02780139 $"

## 
## *** Dimensions for the cloud matrix ***
## [1] 8815   14
## List of 2
##  $ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':   8815 obs. of  7 variables:
##   ..$ isCLR : num [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ isVV  : num [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV  : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ numFEW: int [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numSCT: int [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numBKN: int [1:8815] 0 0 0 0 0 0 0 0 0 1 ...
##   ..$ numOVC: int [1:8815] 1 1 1 1 1 1 1 1 1 0 ...
##  $ mtxCloud : chr [1:8815, 1:14] "" "" "" "" ...
## 
## *** Lowest clouds by type tibble ***
## # A tibble: 8,815 x 5
##    lowVV lowOVC lowBKN lowSCT lowFEW
##    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1    NA   1600     NA     NA     NA
##  2    NA   1600     NA     NA     NA
##  3    NA   1500     NA     NA     NA
##  4    NA   1900     NA     NA     NA
##  5    NA   1700     NA     NA     NA
##  6    NA   1700     NA     NA     NA
##  7    NA   1600     NA     NA     NA
##  8    NA   1500     NA     NA     NA
##  9    NA   2000     NA     NA     NA
## 10    NA     NA   1600     NA     NA
## # ... with 8,805 more rows

## List of 3
##  $ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame': 8815 obs. of  5 variables:
##   ..$ lowVV : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC: num [1:8815] 1600 1600 1500 1900 1700 1700 1600 1500 2000 NA ...
##   ..$ lowBKN: num [1:8815] NA NA NA NA NA NA NA NA NA 1600 ...
##   ..$ lowSCT: num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowFEW: num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##  $ minCeilingLevel: num [1:8815] 1600 1600 1500 1900 1700 1700 1600 1500 2000 1600 ...
##  $ minCloudLevel  : num [1:8815] 1600 1600 1500 1900 1700 1700 1600 1500 2000 1600 ...
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  30 variables:
##  $ METAR     : chr  "51Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "51Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "51Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061" "51Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ WindDir   : chr  "260" "260" "250" "240" ...
##  $ WindSpeed : int  8 8 8 9 6 7 10 11 8 10 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  8 9 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 ...
##  $ DewC      : int  -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 ...
##  $ Altimeter : int  3021 3021 3021 3022 3021 3020 3021 3022 3022 3021 ...
##  $ SLP       : int  239 240 239 242 239 237 237 242 244 239 ...
##  $ FahrC     : chr  "T10281056" "T10281056" "T10281061" "T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ TempF     : num  27 27 27 27 27 ...
##  $ DewF      : num  21.9 21.9 21 21 21 ...
##  $ modSLP    : num  1024 1024 1024 1024 1024 ...
##  $ isCLR     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ isVV      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ htVV      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ numFEW    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numSCT    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numBKN    : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ numOVC    : int  1 1 1 1 1 1 1 1 1 0 ...
##  $ lowVV     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowOVC    : num  1600 1600 1500 1900 1700 1700 1600 1500 2000 NA ...
##  $ lowBKN    : num  NA NA NA NA NA NA NA NA NA 1600 ...
##  $ lowSCT    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowFEW    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 2 3 ...
##  $ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...

str(kord2016METAR)
## List of 8
##  $ fullMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8815 obs. of  30 variables:
##   ..$ METAR     : chr [1:8815] "51Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "51Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "51Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061" "51Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##   ..$ WindDir   : chr [1:8815] "260" "260" "250" "240" ...
##   ..$ WindSpeed : int [1:8815] 8 8 8 9 6 7 10 11 8 10 ...
##   ..$ WindGust  : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8815] " " " " " " " " ...
##   ..$ Visibility: num [1:8815] 8 9 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8815] -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 ...
##   ..$ DewC      : int [1:8815] -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 ...
##   ..$ Altimeter : int [1:8815] 3021 3021 3021 3022 3021 3020 3021 3022 3022 3021 ...
##   ..$ SLP       : int [1:8815] 239 240 239 242 239 237 237 242 244 239 ...
##   ..$ FahrC     : chr [1:8815] "T10281056" "T10281056" "T10281061" "T10281061" ...
##   ..$ dtime     : POSIXct[1:8815], format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##   ..$ origMETAR : chr [1:8815] "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##   ..$ TempF     : num [1:8815] 27 27 27 27 27 ...
##   ..$ DewF      : num [1:8815] 21.9 21.9 21 21 21 ...
##   ..$ modSLP    : num [1:8815] 1024 1024 1024 1024 1024 ...
##   ..$ isCLR     : num [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ isVV      : num [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV      : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ numFEW    : int [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numSCT    : int [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numBKN    : int [1:8815] 0 0 0 0 0 0 0 0 0 1 ...
##   ..$ numOVC    : int [1:8815] 1 1 1 1 1 1 1 1 1 0 ...
##   ..$ lowVV     : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC    : num [1:8815] 1600 1600 1500 1900 1700 1700 1600 1500 2000 NA ...
##   ..$ lowBKN    : num [1:8815] NA NA NA NA NA NA NA NA NA 1600 ...
##   ..$ lowSCT    : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowFEW    : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 2 3 ...
##   ..$ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...
##  $ funcMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8815 obs. of  29 variables:
##   ..$ station          : chr [1:8815] "ORD" "ORD" "ORD" "ORD" ...
##   ..$ valid            : POSIXct[1:8815], format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##   ..$ tmpf             : num [1:8815] 27 27 27 27 27 ...
##   ..$ dwpf             : num [1:8815] 21.9 21.9 21 21 21 ...
##   ..$ relh             : num [1:8815] 81 81 78 78 78 ...
##   ..$ drct             : num [1:8815] 260 260 250 240 250 250 270 260 250 250 ...
##   ..$ sknt             : num [1:8815] 8 8 8 9 6 7 10 11 8 10 ...
##   ..$ p01i             : chr [1:8815] "0.00" "0.00" "0.00" "0.00" ...
##   ..$ alti             : num [1:8815] 30.2 30.2 30.2 30.2 30.2 ...
##   ..$ mslp             : num [1:8815] 1024 1024 1024 1024 1024 ...
##   ..$ vsby             : num [1:8815] 8 9 10 10 10 10 10 10 10 10 ...
##   ..$ gust             : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyc1            : chr [1:8815] "OVC" "OVC" "OVC" "OVC" ...
##   ..$ skyc2            : chr [1:8815] NA NA NA NA ...
##   ..$ skyc3            : chr [1:8815] NA NA NA NA ...
##   ..$ skyc4            : chr [1:8815] NA NA NA NA ...
##   ..$ skyl1            : num [1:8815] 1600 1600 1500 1900 1700 1700 1600 1500 2000 1600 ...
##   ..$ skyl2            : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl3            : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl4            : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8815] NA NA NA NA ...
##   ..$ ice_accretion_1hr: logi [1:8815] NA NA NA NA NA NA ...
##   ..$ ice_accretion_3hr: logi [1:8815] NA NA NA NA NA NA ...
##   ..$ ice_accretion_6hr: logi [1:8815] NA NA NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_drct   : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_time   : POSIXct[1:8815], format: NA NA ...
##   ..$ feel             : num [1:8815] 17.9 17.9 17.9 17.2 19.5 ...
##   ..$ metar            : chr [1:8815] "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##   ..- attr(*, "problems")=Classes 'tbl_df', 'tbl' and 'data.frame':  22 obs. of  5 variables:
##   .. ..$ row     : int [1:22] 1678 1680 1902 1903 1903 2919 2921 2922 2929 2934 ...
##   .. ..$ col     : chr [1:22] "ice_accretion_1hr" "ice_accretion_6hr" "ice_accretion_1hr" "ice_accretion_1hr" ...
##   .. ..$ expected: chr [1:22] "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" ...
##   .. ..$ actual  : chr [1:22] "0.01" "0.01" "0.01" "0.01" ...
##   .. ..$ file    : chr [1:22] "'./RInputFiles/metar_kord_2016.txt'" "'./RInputFiles/metar_kord_2016.txt'" "'./RInputFiles/metar_kord_2016.txt'" "'./RInputFiles/metar_kord_2016.txt'" ...
##   ..- attr(*, "spec")=
##   .. .. cols(
##   .. ..   station = col_character(),
##   .. ..   valid = col_datetime(format = ""),
##   .. ..   tmpf = col_double(),
##   .. ..   dwpf = col_double(),
##   .. ..   relh = col_double(),
##   .. ..   drct = col_double(),
##   .. ..   sknt = col_double(),
##   .. ..   p01i = col_character(),
##   .. ..   alti = col_double(),
##   .. ..   mslp = col_double(),
##   .. ..   vsby = col_double(),
##   .. ..   gust = col_double(),
##   .. ..   skyc1 = col_character(),
##   .. ..   skyc2 = col_character(),
##   .. ..   skyc3 = col_character(),
##   .. ..   skyc4 = col_character(),
##   .. ..   skyl1 = col_double(),
##   .. ..   skyl2 = col_double(),
##   .. ..   skyl3 = col_double(),
##   .. ..   skyl4 = col_double(),
##   .. ..   wxcodes = col_character(),
##   .. ..   ice_accretion_1hr = col_logical(),
##   .. ..   ice_accretion_3hr = col_logical(),
##   .. ..   ice_accretion_6hr = col_logical(),
##   .. ..   peak_wind_gust = col_double(),
##   .. ..   peak_wind_drct = col_double(),
##   .. ..   peak_wind_time = col_datetime(format = ""),
##   .. ..   feel = col_double(),
##   .. ..   metar = col_character()
##   .. .. )
##  $ windMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8815 obs. of  32 variables:
##   ..$ station          : chr [1:8815] "ORD" "ORD" "ORD" "ORD" ...
##   ..$ valid            : POSIXct[1:8815], format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##   ..$ tmpf             : num [1:8815] 27 27 27 27 27 ...
##   ..$ dwpf             : num [1:8815] 21.9 21.9 21 21 21 ...
##   ..$ relh             : num [1:8815] 81 81 78 78 78 ...
##   ..$ drct             : num [1:8815] 260 260 250 240 250 250 270 260 250 250 ...
##   ..$ sknt             : num [1:8815] 8 8 8 9 6 7 10 11 8 10 ...
##   ..$ p01i             : chr [1:8815] "0.00" "0.00" "0.00" "0.00" ...
##   ..$ alti             : num [1:8815] 30.2 30.2 30.2 30.2 30.2 ...
##   ..$ mslp             : num [1:8815] 1024 1024 1024 1024 1024 ...
##   ..$ vsby             : num [1:8815] 8 9 10 10 10 10 10 10 10 10 ...
##   ..$ gust             : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyc1            : chr [1:8815] "OVC" "OVC" "OVC" "OVC" ...
##   ..$ skyc2            : chr [1:8815] NA NA NA NA ...
##   ..$ skyc3            : chr [1:8815] NA NA NA NA ...
##   ..$ skyc4            : chr [1:8815] NA NA NA NA ...
##   ..$ skyl1            : num [1:8815] 1600 1600 1500 1900 1700 1700 1600 1500 2000 1600 ...
##   ..$ skyl2            : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl3            : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl4            : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8815] NA NA NA NA ...
##   ..$ ice_accretion_1hr: logi [1:8815] NA NA NA NA NA NA ...
##   ..$ ice_accretion_3hr: logi [1:8815] NA NA NA NA NA NA ...
##   ..$ ice_accretion_6hr: logi [1:8815] NA NA NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_drct   : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_time   : POSIXct[1:8815], format: NA NA ...
##   ..$ feel             : num [1:8815] 17.9 17.9 17.9 17.2 19.5 ...
##   ..$ metar            : chr [1:8815] "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##   ..$ dirW             : chr [1:8815] "260" "260" "250" "240" ...
##   ..$ spdW             : num [1:8815] 8 8 8 9 6 7 10 11 8 10 ...
##   ..$ gustW            : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##  $ initMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8815 obs. of  13 variables:
##   ..$ METAR     : chr [1:8815] "51Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "51Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "51Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061" "51Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##   ..$ WindDir   : chr [1:8815] "260" "260" "250" "240" ...
##   ..$ WindSpeed : chr [1:8815] "08" "08" "08" "09" ...
##   ..$ WindGust  : chr [1:8815] NA NA NA NA ...
##   ..$ Dummy     : chr [1:8815] " " " " " " " " ...
##   ..$ Visibility: chr [1:8815] "8SM" "9SM" "10SM" "10SM" ...
##   ..$ TempC     : chr [1:8815] "M03" "M03" "M03" "M03" ...
##   ..$ DewC      : chr [1:8815] "M06" "M06" "M06" "M06" ...
##   ..$ Altimeter : chr [1:8815] "A3021" "A3021" "A3021" "A3022" ...
##   ..$ SLP       : chr [1:8815] "SLP239" "SLP240" "SLP239" "SLP242" ...
##   ..$ FahrC     : chr [1:8815] "T10281056" "T10281056" "T10281061" "T10281061" ...
##   ..$ dtime     : POSIXct[1:8815], format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##   ..$ origMETAR : chr [1:8815] "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ convMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8815 obs. of  15 variables:
##   ..$ METAR     : chr [1:8815] "51Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "51Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "51Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061" "51Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##   ..$ WindDir   : chr [1:8815] "260" "260" "250" "240" ...
##   ..$ WindSpeed : int [1:8815] 8 8 8 9 6 7 10 11 8 10 ...
##   ..$ WindGust  : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8815] " " " " " " " " ...
##   ..$ Visibility: num [1:8815] 8 9 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8815] -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 ...
##   ..$ DewC      : int [1:8815] -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 ...
##   ..$ Altimeter : int [1:8815] 3021 3021 3021 3022 3021 3020 3021 3022 3022 3021 ...
##   ..$ SLP       : int [1:8815] 239 240 239 242 239 237 237 242 244 239 ...
##   ..$ FahrC     : chr [1:8815] "T10281056" "T10281056" "T10281061" "T10281061" ...
##   ..$ dtime     : POSIXct[1:8815], format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##   ..$ origMETAR : chr [1:8815] "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##   ..$ TempF     : num [1:8815] 27 27 27 27 27 ...
##   ..$ DewF      : num [1:8815] 21.9 21.9 21 21 21 ...
##  $ parseMETAR     :Classes 'tbl_df', 'tbl' and 'data.frame': 8815 obs. of  16 variables:
##   ..$ METAR     : chr [1:8815] "51Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "51Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "51Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061" "51Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##   ..$ WindDir   : chr [1:8815] "260" "260" "250" "240" ...
##   ..$ WindSpeed : int [1:8815] 8 8 8 9 6 7 10 11 8 10 ...
##   ..$ WindGust  : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8815] " " " " " " " " ...
##   ..$ Visibility: num [1:8815] 8 9 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8815] -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 ...
##   ..$ DewC      : int [1:8815] -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 ...
##   ..$ Altimeter : int [1:8815] 3021 3021 3021 3022 3021 3020 3021 3022 3022 3021 ...
##   ..$ SLP       : int [1:8815] 239 240 239 242 239 237 237 242 244 239 ...
##   ..$ FahrC     : chr [1:8815] "T10281056" "T10281056" "T10281061" "T10281061" ...
##   ..$ dtime     : POSIXct[1:8815], format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##   ..$ origMETAR : chr [1:8815] "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##   ..$ TempF     : num [1:8815] 27 27 27 27 27 ...
##   ..$ DewF      : num [1:8815] 21.9 21.9 21 21 21 ...
##   ..$ modSLP    : num [1:8815] 1024 1024 1024 1024 1024 ...
##  $ initClouds     :List of 2
##   ..$ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  7 variables:
##   .. ..$ isCLR : num [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ isVV  : num [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ htVV  : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ numFEW: int [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ numSCT: int [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ numBKN: int [1:8815] 0 0 0 0 0 0 0 0 0 1 ...
##   .. ..$ numOVC: int [1:8815] 1 1 1 1 1 1 1 1 1 0 ...
##   ..$ mtxCloud : chr [1:8815, 1:14] "" "" "" "" ...
##  $ processedClouds:List of 3
##   ..$ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame':  8815 obs. of  5 variables:
##   .. ..$ lowVV : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowOVC: num [1:8815] 1600 1600 1500 1900 1700 1700 1600 1500 2000 NA ...
##   .. ..$ lowBKN: num [1:8815] NA NA NA NA NA NA NA NA NA 1600 ...
##   .. ..$ lowSCT: num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowFEW: num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ minCeilingLevel: num [1:8815] 1600 1600 1500 1900 1700 1700 1600 1500 2000 1600 ...
##   ..$ minCloudLevel  : num [1:8815] 1600 1600 1500 1900 1700 1700 1600 1500 2000 1600 ...

Example #24: Downloading the Data by Function

The Iowa State Mesonet website provides instructions for automating the download process using Python or R, including links to a GitHub repository with instructions for R download. The code can be adapted for use here.

Example code includes:

# Function to get ASOS data from Iowa State
getASOSData <- function(faaID, startDate, endDate, suffix,
                        dirDownload = "./RInputFiles/", getAgain=FALSE) {

    # Create the file name and location for saving data
    fileName <- paste0("metar_k", str_to_lower(faaID), "_", suffix, ".txt")
    fileLoc <- paste0(dirDownload, fileName)

    # Check whether the file already exists, stop if so and getAgain is FALSE, otherwise download the data
    if (file.exists(fileLoc) & !getAgain) {
        cat("\nFile already exists - ", fileLoc)
        cat("\nStopping download routine\n")
        return(FALSE)
    } else {
        # Get the year, day, and hour of the key dates
        y1 <- lubridate::year(startDate)
        m1 <- lubridate::month(startDate)
        d1 <- lubridate::day(startDate)
    
        y2 <- lubridate::year(endDate)
        m2 <- lubridate::month(endDate)
        d2 <- lubridate::day(endDate)

        # Mimic the string shown below
        # https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=LAS&data=all&year1=2015&month1=12&day1=31&year2=2017&month2=1&day2=2&tz=Etc%2FUTC&format=onlycomma&latlon=no&missing=M&trace=T&direct=no&report_type=2
    
        baseURL <- "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?"  # base URL
        useURL <- paste0(baseURL, "station=", faaID)  # add the desired station
        useURL <- paste0(useURL, "&data=all")  # request all data fields
        useURL <- paste0(useURL, "&year1=", y1, "&month1=", m1, "&day1=", d1)  # Specify start ymd
        useURL <- paste0(useURL, "&year2=", y2, "&month2=", m2, "&day2=", d2)  # Specify end ymd
        useURL <- paste0(useURL, "&tz=Etc%2FUTC&format=onlycomma&latlon=no")  # Formatting
        useURL <- paste0(useURL, "&missing=M&trace=T&direct=no&report_type=2")  # Formatting
    
        # Download the file
        download.file(useURL, destfile=fileLoc, method="curl")
        
        return(TRUE)
    }
}

And then cache the actual download step to minimize utilization of the Iowa State server (though the code is set to not download to an existing file anyway):

# Specify the FAA ID and Analysis Year
useFAAID <- "LAS"
analysisYear <- 2016

# Specify the start and end dates based on the analysis year
startDate <- ISOdate(analysisYear-1, 12, 31, hour=0)
endDate <- ISOdate(analysisYear+1, 1, 2, hour=0)

# Get the relevant data
getASOSData(faaID=useFAAID, startDate=startDate, endDate=endDate, suffix=analysisYear)
## [1] TRUE

And then explore the data to set the key parameters for a full run of the METAR process:

# Set key parameters for reading and interpreting METAR based on above inputs
fname <- paste0("./RInputFiles/metar_k", str_to_lower(useFAAID), "_", analysisYear, ".txt")

# Find the most common Zulu time (this will be the METAR)
zTimes <- readr::read_csv(fname) %>%
    pull(metar) %>%
    str_match(pattern="\\d{2}Z") %>%
    as.vector() %>%
    table() %>%
    sort(decreasing=TRUE)
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   valid = col_datetime(format = ""),
##   tmpf = col_double(),
##   dwpf = col_double(),
##   relh = col_double(),
##   sknt = col_double(),
##   alti = col_double(),
##   vsby = col_double(),
##   feel = col_double()
## )
## See spec(...) for full column specifications.
## Warning: 64 parsing failures.
##  row  col expected actual                                file
## 1585 relh a double      M './RInputFiles/metar_klas_2016.txt'
## 1585 sknt a double      M './RInputFiles/metar_klas_2016.txt'
## 1585 feel a double      M './RInputFiles/metar_klas_2016.txt'
## 1869 relh a double      M './RInputFiles/metar_klas_2016.txt'
## 1869 sknt a double      M './RInputFiles/metar_klas_2016.txt'
## .... .... ........ ...... ...................................
## See problems(...) for more details.
cat("\nThe most common Zulu time is", names(zTimes)[1], "\nFrequency is",
    round(100*zTimes[1]/sum(zTimes), 1), "% (", zTimes[1], "of", sum(zTimes), ")"
    )
## 
## The most common Zulu time is 56Z 
## Frequency is 97.7 % ( 8818 of 9027 )
# Use the most common Zulu time and start/end dates to set key parameters
timeZ <- names(zTimes)[1]  # Zulu time that METAR is recorded at this station
expMin <- ISOdate(analysisYear-1, 12, 31, hour=0, min=as.integer(str_replace(timeZ, "Z", "")))
expDays <- as.integer(endDate - startDate)  # Expected total days read

# Provide a descriptive name
locMET <- "Las Vegas, NV"  # Description of city or location
shortMET <- "KLAS METAR (2016)"  # Station code and timing
longMET <- "Las Vegas, NV Hourly METAR (2016)"  # Description of city or location and timing

# Extraction format for METAR - paste the expected Zulu time at the front
valMet <- paste0(timeZ, ".*?(VRB|\\d{3})(\\d{2})(G\\d{2})?KT(.*?)(\\d{1,2}SM).*?\\s(M?\\d{2})/(M?\\d{2}).*?(A\\d{4}).*?RMK.*?(SLP\\d{3}).*?(T\\d{8})")

# Run the process for Lincoln, NE
klas2016METAR <- runAllMETAR(fname=fname, timeZ=timeZ, expMin=expMin, expDays=expDays, 
                             locMET=locMET, shortMET=shortMET, longMET=longMET, valMet=valMet
                             )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   station = col_character(),
##   valid = col_datetime(format = ""),
##   p01i = col_character(),
##   skyc1 = col_character(),
##   skyc2 = col_character(),
##   skyc3 = col_character(),
##   skyc4 = col_character(),
##   wxcodes = col_character(),
##   ice_accretion_1hr = col_logical(),
##   ice_accretion_3hr = col_logical(),
##   ice_accretion_6hr = col_logical(),
##   peak_wind_time = col_datetime(format = ""),
##   metar = col_character()
## )
## See spec(...) for full column specifications.
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 9027 obs. of  29 variables:
##  $ station          : chr  "LAS" "LAS" "LAS" "LAS" ...
##  $ valid            : POSIXct, format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##  $ tmpf             : num  46.9 44.1 43 41 39.9 ...
##  $ dwpf             : num  18 17.1 16 16 17.1 ...
##  $ relh             : num  31.1 33.4 33.2 35.9 39.2 ...
##  $ drct             : num  100 10 10 340 210 230 230 0 0 210 ...
##  $ sknt             : num  3 5 5 5 3 3 3 0 0 3 ...
##  $ p01i             : chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ alti             : num  30.1 30.1 30.1 30.1 30.1 ...
##  $ mslp             : num  1020 1020 1021 1022 1022 ...
##  $ vsby             : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ gust             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyc1            : chr  "CLR" "CLR" "CLR" "CLR" ...
##  $ skyc2            : chr  NA NA NA NA ...
##  $ skyc3            : chr  NA NA NA NA ...
##  $ skyc4            : chr  NA NA NA NA ...
##  $ skyl1            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl2            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl3            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl4            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wxcodes          : chr  NA NA NA NA ...
##  $ ice_accretion_1hr: logi  NA NA NA NA NA NA ...
##  $ ice_accretion_3hr: logi  NA NA NA NA NA NA ...
##  $ ice_accretion_6hr: logi  NA NA NA NA NA NA ...
##  $ peak_wind_gust   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_drct   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_time   : POSIXct, format: NA NA ...
##  $ feel             : num  45.8 40.7 39.4 37.1 37.7 ...
##  $ metar            : chr  "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
## 
## *** OBSERVATIONS EXPECTED BUT NOT RECORDED ***
##  [1] "2016-06-17 23:56:00 UTC" "2016-06-18 00:56:00 UTC"
##  [3] "2016-07-13 14:56:00 UTC" "2016-07-13 15:56:00 UTC"
##  [5] "2016-07-13 16:56:00 UTC" "2016-07-13 17:56:00 UTC"
##  [7] "2016-08-05 07:56:00 UTC" "2016-08-12 17:56:00 UTC"
##  [9] "2016-08-23 14:56:00 UTC" "2016-08-24 00:56:00 UTC"
## [11] "2016-08-24 01:56:00 UTC" "2016-11-21 00:56:00 UTC"
## [13] "2016-12-03 09:56:00 UTC" "2016-12-03 10:56:00 UTC"
## 
## *** OBSERVATIONS RECORDED BUT NOT EXPECTED ***
## POSIXct of length 0
## 
## *** Are the extracted records unique? ***
## [1] TRUE
## 
## 
## *** First 6 winds and parsing ***
##      [,1]      [,2]  [,3] [,4]
## [1,] "10003KT" "100" "03" NA  
## [2,] "01005KT" "010" "05" NA  
## [3,] "01005KT" "010" "05" NA  
## [4,] "34005KT" "340" "05" NA  
## [5,] "21003KT" "210" "03" NA  
## [6,] "23003KT" "230" "03" NA  
## 
## *** Table of WIND DIRECTION ***
## 
##  000  010  020  030  040  050  060  070  080  090  100  110  120  130  140  150 
## 1397  152  172  161  162  196  164  167  172  119  101   79   51   49   70  100 
##  160  170  180  190  200  210  220  230  240  250  260  270  280  290  300  310 
##  186  297  436  629  594  543  512  348  200  135  118  118  112   81   51   59 
##  320  330  340  350  360  VRB <NA> 
##   76  145  126  113  104  504   19 
## 
## *** Table of WIND SPEED ***
## 
##   00   03   04   05   06   07   08   09   10   11   12   13   14   15   16   17 
## 1397 1349 1247  963  727  500  474  375  386  301  244  192  161  121   88   58 
##   18   19   20   21   22   23   24   25   26   27   28   29   31 <NA> 
##   67   35   41   28   14   12    7    3    2    2    3    1    1   19 
## 
## *** Table of WIND GUST ***
## 
##  G14  G15  G16  G17  G18  G19  G20  G21  G22  G23  G24  G25  G26  G27  G28  G29 
##   23   80   94  146  128  130  116  106  111   77   82   73   53   38   28   28 
##  G30  G31  G32  G33  G34  G35  G36  G37  G38  G39  G40  G41  G44  G45 <NA> 
##   23   20    9   11   13    6   10    3    1    5    1    1    2    2 7398 
## 
##  *** WIND DATA WAS NOT CAPTURED FROM: *** 
## # A tibble: 19 x 1
##    metar                                                                        
##    <chr>                                                                        
##  1 KLAS 042156Z 10SM FEW200 BKN250 26/M09 A2988 RMK AO2 SLP100 T02561089        
##  2 KLAS 161456Z 10SM FEW250 14/M06 A3010 RMK AO2 SLP180 T01441061 51021         
##  3 KLAS 022256Z 10SM FEW090 SCT250 26/M01 A2997 RMK AO2 SLP134 T02611006        
##  4 KLAS 032056Z 10SM FEW250 28/01 A3000 RMK AO2 SLP138 T02780006 57020          
##  5 KLAS 132156Z 10SM FEW120 36/M03 A2989 RMK AO2 SLP095 T03611033               
##  6 KLAS 151756Z 10SM FEW250 27/M01 A2970 RMK AO2 SLP030 T02671006 10272 20217 5~
##  7 KLAS 182156Z 10SM SCT130 SCT200 29/02 A2987 RMK AO2 SLP093 T02940022         
##  8 KLAS 282056Z 10SM SCT120 BKN250 32/M03 A2979 RMK AO2 SLP061 T03171028 56016  
##  9 KLAS 131856Z 10SM FEW120 32/01 A2993 RMK AO2 SLP108 T03170011                
## 10 KLAS 221756Z 10SM FEW180 FEW250 38/M01 A2984 RMK AO2 SLP069 T03781006 10383 ~
## 11 KLAS 281856Z 10SM FEW095 SCT130 SCT200 40/04 A2995 RMK AO2 SLP106 T04000044  
## 12 KLAS 012256Z 10SM FEW100 FEW160 SCT250 35/13 A2981 RMK AO2 SLP068 TCU SW AND~
## 13 KLAS 241656Z 9SM CLR 38/02 A2986 RMK AO2 SLP077 FU ALQDS VIS LWR S T03780022 
## 14 KLAS 011956Z 10SM SCT120 SCT250 37/16 A2990 RMK AO2 SLP095 TCU DSNT N AND W ~
## 15 KLAS 061856Z 10SM FEW100 FEW140 36/09 A2982 RMK AO2 SLP066 T03610094         
## 16 KLAS 112156Z 10SM FEW110 FEW200 37/00 A2992 RMK AO2 SLP100 CB DSNT SE ACC DS~
## 17 KLAS 222256Z 10SM SCT120 SCT200 36/04 A2978 RMK AO2 SLP055 CB DSNT SE ACC DS~
## 18 KLAS 092156Z 10SM FEW120 37/M01 A2993 RMK AO2 SLP106 T03721006               
## 19 KLAS 130456Z 10SM CLR 29/01 A2968 RMK AO2 SLP023 T02890006

## Warning: Removed 19 rows containing non-finite values (stat_count).

## Warning: Removed 1 rows containing missing values (geom_point).
## 
## *** Tentative Summary of Element Parsing *** 
## .
## FALSE  TRUE 
##    35  8783 
## 
## *** Data Not Matched *** 
##  [1] "KLAS 042156Z 10SM FEW200 BKN250 26/M09 A2988 RMK AO2 SLP100 T02561089"                                                                          
##  [2] "KLAS 161456Z 10SM FEW250 14/M06 A3010 RMK AO2 SLP180 T01441061 51021"                                                                           
##  [3] "KLAS 022256Z 10SM FEW090 SCT250 26/M01 A2997 RMK AO2 SLP134 T02611006"                                                                          
##  [4] "KLAS 032056Z 10SM FEW250 28/01 A3000 RMK AO2 SLP138 T02780006 57020"                                                                            
##  [5] "KLAS 132156Z 10SM FEW120 36/M03 A2989 RMK AO2 SLP095 T03611033"                                                                                 
##  [6] "KLAS 151756Z 10SM FEW250 27/M01 A2970 RMK AO2 SLP030 T02671006 10272 20217 51002"                                                               
##  [7] "KLAS 182156Z 10SM SCT130 SCT200 29/02 A2987 RMK AO2 SLP093 T02940022"                                                                           
##  [8] "KLAS 282056Z 10SM SCT120 BKN250 32/M03 A2979 RMK AO2 SLP061 T03171028 56016"                                                                    
##  [9] "KLAS 131856Z 10SM FEW120 32/01 A2993 RMK AO2 SLP108 T03170011"                                                                                  
## [10] "KLAS 221756Z 10SM FEW180 FEW250 38/M01 A2984 RMK AO2 SLP069 T03781006 10383 20306 50003"                                                        
## [11] "KLAS 281856Z 10SM FEW095 SCT130 SCT200 40/04 A2995 RMK AO2 SLP106 T04000044"                                                                    
## [12] "KLAS 012256Z 10SM FEW100 FEW160 SCT250 35/13 A2981 RMK AO2 SLP068 TCU SW AND DSNT S T03500133"                                                  
## [13] "KLAS 241656Z 9SM CLR 38/02 A2986 RMK AO2 SLP077 FU ALQDS VIS LWR S T03780022"                                                                   
## [14] "KLAS 011956Z 10SM SCT120 SCT250 37/16 A2990 RMK AO2 SLP095 TCU DSNT N AND W AND NW ACC DSNT S-SW T03670156"                                     
## [15] "KLAS 061856Z 10SM FEW100 FEW140 36/09 A2982 RMK AO2 SLP066 T03610094"                                                                           
## [16] "KLAS 112156Z 10SM FEW110 FEW200 37/00 A2992 RMK AO2 SLP100 CB DSNT SE ACC DSNT E T03720000"                                                     
## [17] "KLAS 222256Z 10SM SCT120 SCT200 36/04 A2978 RMK AO2 SLP055 CB DSNT SE ACC DSNT ALQDS T03610044"                                                 
## [18] "KLAS 231556Z 00000KT 10SM FEW150 SCT180 26/14 A3005 RMK SLPNO T02570144"                                                                        
## [19] "KLAS 231656Z 10003KT 10SM FEW180 28/14 A3006 RMK SLPNO T02790143"                                                                               
## [20] "KLAS 231756Z 13004KT 10SM FEW100 FEW180 27/14 A3006 RMK SLPNO T02730139"                                                                        
## [21] "KLAS 231856Z 00000KT 10SM FEW100 30/13 A3004 RMK SLPNO T03010127"                                                                               
## [22] "KLAS 231956Z 00000KT 10SM FEW100 31/13 A3002 RMK SLPNO T03130131"                                                                               
## [23] "KLAS 232056Z 03004KT 10SM FEW100 31/13 A2999 RMK SLPNO CB DSNT NE T03080131"                                                                    
## [24] "KLAS 232156Z 10005KT 10SM FEW100 32/13 A2996 RMK SLPNO TCU DSNT N AND NW T03160132"                                                             
## [25] "KLAS 232256Z 08005KT 10SM FEW100 32/13 A2994 RMK SLPNO TCU DSNT W-NW T03210134"                                                                 
## [26] "KLAS 232356Z 05006KT 10SM FEW100 FEW250 32/14 A2992 RMK SLPNO ACC W T03240136"                                                                  
## [27] "KLAS 240256Z 01007KT 10SM FEW100 FEW250 32/11 A2994 RMK AO2 SLPNO CB DSNT N T03220106 53004 $"                                                  
## [28] "KLAS 240356Z 05011KT 10SM FEW100 FEW250 32/11 A2997 RMK AO2 SLPNO T03170106 $"                                                                  
## [29] "KLAS 240456Z 02007KT 10SM SCT100 SCT250 31/12 A2998 RMK AO2 SLPNO T03060122 $"                                                                  
## [30] "KLAS 240556Z 34007KT 10SM SCT120 BKN160 BKN250 31/11 A3000 RMK AO2 SLPNO OCNL LTGIC DSNT NW T030600111 10339 20300 51018 $"                     
## [31] "KLAS 240656Z VRB03KT 10SM -TSRA FEW030 SCT095CB BKN150 27/13 A3001 RMK AO2 RAB40 TSB38 SLPNO OCNL LTGICCG W-N TS W-N MOV S P0003 T02720133 $"   
## [32] "KLAS 240756Z 26010KT 10SM TS FEW050 FEW110CB BKN150 27/13 A2999 RMK AO2 RAE29 SLPNO OCNL LTGICCG S-SW TS S-SW MOV S P0000 T02720133 403390267 $"
## [33] "KLAS 092156Z 10SM FEW120 37/M01 A2993 RMK AO2 SLP106 T03721006"                                                                                 
## [34] "KLAS 130456Z 10SM CLR 29/01 A2968 RMK AO2 SLP023 T02890006"                                                                                     
## [35] "KLAS 071456Z 02010KT 10SM BKN250 07/M13 A3011"                                                                                                  
## 
## *** Parsing matrix summary *** 
## [1] 8818   11
##      [,1]                                                         [,2]  [,3]
## [1,] "56Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "100" "03"
## [2,] "56Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "010" "05"
## [3,] "56Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089" "010" "05"
## [4,] "56Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" "340" "05"
## [5,] "56Z 21003KT 10SM CLR 04/M08 A3015 RMK AO2 SLP216 T00441083" "210" "03"
## [6,] "56Z 23003KT 10SM CLR 03/M08 A3016 RMK AO2 SLP219 T00331083" "230" "03"
##      [,4] [,5] [,6]   [,7] [,8]  [,9]    [,10]    [,11]      
## [1,] NA   " "  "10SM" "08" "M08" "A3009" "SLP196" "T00831078"
## [2,] NA   " "  "10SM" "07" "M08" "A3011" "SLP204" "T00671083"
## [3,] NA   " "  "10SM" "06" "M09" "A3013" "SLP210" "T00611089"
## [4,] NA   " "  "10SM" "05" "M09" "A3014" "SLP215" "T00501089"
## [5,] NA   " "  "10SM" "04" "M08" "A3015" "SLP216" "T00441083"
## [6,] NA   " "  "10SM" "03" "M08" "A3016" "SLP219" "T00331083"
## 
## *** Summary of the parsed data *** 
## Observations: 8,818
## Variables: 13
## $ METAR      <chr> "56Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831...
## $ WindDir    <chr> "100", "010", "010", "340", "210", "230", "230", "000", ...
## $ WindSpeed  <chr> "03", "05", "05", "05", "03", "03", "03", "00", "00", "0...
## $ WindGust   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ Dummy      <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", "...
## $ Visibility <chr> "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", ...
## $ TempC      <chr> "08", "07", "06", "05", "04", "03", "02", "02", "01", "0...
## $ DewC       <chr> "M08", "M08", "M09", "M09", "M08", "M08", "M09", "M09", ...
## $ Altimeter  <chr> "A3009", "A3011", "A3013", "A3014", "A3015", "A3016", "A...
## $ SLP        <chr> "SLP196", "SLP204", "SLP210", "SLP215", "SLP216", "SLP21...
## $ FahrC      <chr> "T00831078", "T00671083", "T00611089", "T00501089", "T00...
## $ dtime      <dttm> 2015-12-31 00:56:00, 2015-12-31 01:56:00, 2015-12-31 02...
## $ origMETAR  <chr> "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP1...
## Warning: NAs introduced by coercion

## 
##  *** Parsed data structure, head, tail, and random sample *** 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8818 obs. of  15 variables:
##  $ METAR     : chr  "56Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "56Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "56Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089" "56Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##  $ WindDir   : chr  "100" "010" "010" "340" ...
##  $ WindSpeed : int  3 5 5 5 3 3 3 0 0 3 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  8 7 6 5 4 3 2 2 1 1 ...
##  $ DewC      : int  -8 -8 -9 -9 -8 -8 -9 -9 -8 -8 ...
##  $ Altimeter : int  3009 3011 3013 3014 3015 3016 3017 3017 3018 3019 ...
##  $ SLP       : int  196 204 210 215 216 219 221 222 226 229 ...
##  $ FahrC     : chr  "T00831078" "T00671083" "T00611089" "T00501089" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##  $ origMETAR : chr  "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##  $ TempF     : num  46.9 44.1 43 41 39.9 ...
##  $ DewF      : num  18 17.1 16 16 17.1 ...
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 56Z ~ 100             3       NA " "           10     8    -8      3009   196
## 2 56Z ~ 010             5       NA " "           10     7    -8      3011   204
## 3 56Z ~ 010             5       NA " "           10     6    -9      3013   210
## 4 56Z ~ 340             5       NA " "           10     5    -9      3014   215
## 5 56Z ~ 210             3       NA " "           10     4    -8      3015   216
## 6 56Z ~ 230             3       NA " "           10     3    -8      3016   219
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 56Z ~ 000             0       NA " "           10    12     4      2979    84
## 2 56Z ~ 170             4       NA " "           10    13     3      2975    71
## 3 56Z ~ 150             6       NA " "           10    14     3      2973    62
## 4 56Z ~ VRB             5       NA " "           10    15     2      2970    50
## 5 56Z ~ 180            10       NA " "           10    15     2      2970    51
## 6 56Z ~ 180            11       NA " "           10    13     3      2971    56
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 20 x 15
##    METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##    <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
##  1 56Z ~ 220            11       NA " "           10    41    -3      2971    25
##  2 56Z ~ 000             0       NA " "           10    19     2      2975    58
##  3 56Z ~ 290             4       NA " "           10    12    -8      2988   112
##  4 56Z ~ 220            13       NA " "           10    19    -4      2958   997
##  5 56Z ~ 010             3       NA " "           10    13     2      3011   192
##  6 56Z ~ 200             5       NA " "           10    31    -4      2981    61
##  7 56Z ~ 000             0       NA " "           10    22    14      3002   139
##  8 56Z ~ 000             0       NA " "           10    10    -9      3032   273
##  9 56Z ~ 260             4       NA " "           10    28     1      2997   117
## 10 56Z ~ 220             3       NA " "           10    28    -4      2993   111
## 11 56Z ~ 120             4       NA " "           10    28    -4      2996   126
## 12 <NA>  <NA>           NA       NA  <NA>         NA    NA    NA        NA    NA
## 13 56Z ~ 000             0       NA " "            9     8     4      3006   176
## 14 56Z ~ 230            10       NA " "           10    30    -1      2987    91
## 15 56Z ~ 000             0       NA " "           10    10     4      2999   149
## 16 56Z ~ 210             5       NA " "           10    32    -7      2978    51
## 17 56Z ~ 230             5       NA " "           10    16     6      2996   129
## 18 56Z ~ 200             4       NA " "           10     5     0      2982    97
## 19 56Z ~ 180             3       NA " "            6     8     7      2996   145
## 20 56Z ~ 200             3       NA " "           10    28    -1      2992    98
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## 
##  *** Number of NA values *** 
##      METAR    WindDir  WindSpeed   WindGust      Dummy Visibility      TempC 
##         35         35         35       8818         35         35         35 
##       DewC  Altimeter        SLP      FahrC      dtime  origMETAR      TempF 
##         35         35         35         35          0          0         35 
##       DewF 
##         35

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(diff(sort(x))): no non-missing arguments to min; returning Inf
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## # A tibble: 9 x 2
##   Visibility     n
##        <dbl> <int>
## 1          3     4
## 2          4     8
## 3          5    11
## 4          6    13
## 5          7    16
## 6          8    26
## 7          9    42
## 8         10  8663
## 9         NA    35
##    WindGust    n
## 1        14   20
## 2        15   77
## 3        16   91
## 4        17  146
## 5        18  126
## 6        19  126
## 7        20  116
## 8        21  106
## 9        22  111
## 10       23   77
## 11       24   82
## 12       25   73
## 13       26   53
## 14       27   38
## 15       28   28
## 16       29   28
## 17       30   23
## 18       31   20
## 19       32    9
## 20       33   11
## 21       34   13
## 22       35    6
## 23       36   10
## 24       37    3
## 25       38    1
## 26       39    5
## 27       40    1
## 28       41    1
## 29       44    2
## 30       45    2
## 31       NA 7413
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## 
##  *** Correlations use 8783 complete cases (99.6% of 8818 total) ***
##            TempC TempF  DewC  DewF Altimeter modSLP WindSpeed Visibility
## TempC       1.00  1.00  0.22  0.22     -0.51  -0.62      0.22       0.01
## TempF       1.00  1.00  0.22  0.22     -0.51  -0.62      0.22       0.01
## DewC        0.22  0.22  1.00  1.00     -0.24  -0.27     -0.04      -0.13
## DewF        0.22  0.22  1.00  1.00     -0.24  -0.27     -0.04      -0.13
## Altimeter  -0.51 -0.51 -0.24 -0.24      1.00   0.99     -0.38       0.06
## modSLP     -0.62 -0.62 -0.27 -0.27      0.99   1.00     -0.38       0.06
## WindSpeed   0.22  0.22 -0.04 -0.04     -0.38  -0.38      1.00      -0.02
## Visibility  0.01  0.01 -0.13 -0.13      0.06   0.06     -0.02       1.00

## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1381 -0.7848 -0.0607  0.6891  3.6362 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.163e+02  1.730e+00  -67.24   <2e-16 ***
## Altimeter    3.769e-01  5.776e-04  652.51   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9753 on 8781 degrees of freedom
##   (35 observations deleted due to missingness)
## Multiple R-squared:  0.9798, Adjusted R-squared:  0.9798 
## F-statistic: 4.258e+05 on 1 and 8781 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter + TempF ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.18161 -0.33113  0.02395  0.33395  1.08509 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.537e+01  8.693e-01  -29.18   <2e-16 ***
## Altimeter    3.478e-01  2.868e-04 1212.89   <2e-16 ***
## TempF       -5.581e-02  2.813e-04 -198.42   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4165 on 8780 degrees of freedom
##   (35 observations deleted due to missingness)
## Multiple R-squared:  0.9963, Adjusted R-squared:  0.9963 
## F-statistic: 1.187e+06 on 2 and 8780 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
## No Records with a cloud type of vertical visibility (VV)
## 
## *** Counts by number of layers of each cloud type ***
##    isCLR isVV numFEW numSCT numBKN numOVC    n
## 1      0    0      0      0      0      1  144
## 2      0    0      0      0      1      0  635
## 3      0    0      0      0      1      1  107
## 4      0    0      0      0      2      0  139
## 5      0    0      0      0      2      1   39
## 6      0    0      0      0      3      0   13
## 7      0    0      0      0      4      0    1
## 8      0    0      0      1      0      0  590
## 9      0    0      0      1      0      1   54
## 10     0    0      0      1      1      0  242
## 11     0    0      0      1      1      1   51
## 12     0    0      0      1      2      0   88
## 13     0    0      0      1      2      1    1
## 14     0    0      0      1      3      0    1
## 15     0    0      0      2      0      0  134
## 16     0    0      0      2      0      1    7
## 17     0    0      0      2      1      0   37
## 18     0    0      0      2      2      0    4
## 19     0    0      0      3      0      0    4
## 20     0    0      0      3      1      0    2
## 21     0    0      1      0      0      0 1933
## 22     0    0      1      0      0      1   99
## 23     0    0      1      0      1      0  454
## 24     0    0      1      0      1      1   92
## 25     0    0      1      0      2      0  140
## 26     0    0      1      0      2      1    2
## 27     0    0      1      0      3      0    2
## 28     0    0      1      1      0      0  330
## 29     0    0      1      1      0      1   34
## 30     0    0      1      1      1      0  212
## 31     0    0      1      1      1      1   17
## 32     0    0      1      1      2      0   26
## 33     0    0      1      2      0      0   53
## 34     0    0      1      2      0      1    1
## 35     0    0      1      2      1      0    9
## 36     0    0      1      3      0      0    1
## 37     0    0      2      0      0      0  533
## 38     0    0      2      0      0      1   18
## 39     0    0      2      0      1      0   64
## 40     0    0      2      0      1      1    2
## 41     0    0      2      0      2      0    3
## 42     0    0      2      1      0      0   75
## 43     0    0      2      1      0      1    1
## 44     0    0      2      1      1      0    5
## 45     0    0      2      2      0      0    3
## 46     0    0      3      0      0      0    4
## 47     0    0      3      0      0      1    1
## 48     0    0      3      0      1      0    1
## 49     0    0      3      1      0      0    2
## 50     0    0      3      1      1      0    1
## 51     1    0      0      0      0      0 2407
## 
## *** METAR records where no clouds were extracted ***
## character(0)
## 
## *** Dimensions for the cloud matrix ***
## [1] 8818   12
## List of 2
##  $ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':   8818 obs. of  7 variables:
##   ..$ isCLR : num [1:8818] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ isVV  : num [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV  : logi [1:8818] NA NA NA NA NA NA ...
##   ..$ numFEW: int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numSCT: int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numBKN: int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numOVC: int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##  $ mtxCloud : chr [1:8818, 1:12] "" "" "" "" ...
## Warning in min(isKey): no non-missing arguments to min; returning Inf
## Warning in stri_replace_first_regex(string, pattern,
## fix_replacement(replacement), : NAs introduced by coercion to integer range

## 
## *** Lowest clouds by type tibble ***
## # A tibble: 8,818 x 5
##    lowVV lowOVC lowBKN lowSCT lowFEW
##    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1    NA     NA     NA     NA     NA
##  2    NA     NA     NA     NA     NA
##  3    NA     NA     NA     NA     NA
##  4    NA     NA     NA     NA     NA
##  5    NA     NA     NA     NA     NA
##  6    NA     NA     NA     NA     NA
##  7    NA     NA     NA     NA     NA
##  8    NA     NA     NA     NA     NA
##  9    NA     NA     NA     NA     NA
## 10    NA     NA     NA     NA     NA
## # ... with 8,808 more rows

## List of 3
##  $ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame': 8818 obs. of  5 variables:
##   ..$ lowVV : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC: num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowBKN: num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowSCT: num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowFEW: num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##  $ minCeilingLevel: num [1:8818] 1e+06 1e+06 1e+06 1e+06 1e+06 ...
##  $ minCloudLevel  : num [1:8818] 1e+06 1e+06 1e+06 1e+06 1e+06 ...
## Classes 'tbl_df', 'tbl' and 'data.frame':    8818 obs. of  30 variables:
##  $ METAR     : chr  "56Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "56Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "56Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089" "56Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##  $ WindDir   : chr  "100" "010" "010" "340" ...
##  $ WindSpeed : int  3 5 5 5 3 3 3 0 0 3 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  8 7 6 5 4 3 2 2 1 1 ...
##  $ DewC      : int  -8 -8 -9 -9 -8 -8 -9 -9 -8 -8 ...
##  $ Altimeter : int  3009 3011 3013 3014 3015 3016 3017 3017 3018 3019 ...
##  $ SLP       : int  196 204 210 215 216 219 221 222 226 229 ...
##  $ FahrC     : chr  "T00831078" "T00671083" "T00611089" "T00501089" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##  $ origMETAR : chr  "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##  $ TempF     : num  46.9 44.1 43 41 39.9 ...
##  $ DewF      : num  18 17.1 16 16 17.1 ...
##  $ modSLP    : num  1020 1020 1021 1022 1022 ...
##  $ isCLR     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ isVV      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ htVV      : logi  NA NA NA NA NA NA ...
##  $ numFEW    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numSCT    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numBKN    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numOVC    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ lowVV     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowOVC    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowBKN    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowSCT    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowFEW    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...

str(klas2016METAR)
## List of 8
##  $ fullMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8818 obs. of  30 variables:
##   ..$ METAR     : chr [1:8818] "56Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "56Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "56Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089" "56Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##   ..$ WindDir   : chr [1:8818] "100" "010" "010" "340" ...
##   ..$ WindSpeed : int [1:8818] 3 5 5 5 3 3 3 0 0 3 ...
##   ..$ WindGust  : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8818] " " " " " " " " ...
##   ..$ Visibility: num [1:8818] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8818] 8 7 6 5 4 3 2 2 1 1 ...
##   ..$ DewC      : int [1:8818] -8 -8 -9 -9 -8 -8 -9 -9 -8 -8 ...
##   ..$ Altimeter : int [1:8818] 3009 3011 3013 3014 3015 3016 3017 3017 3018 3019 ...
##   ..$ SLP       : int [1:8818] 196 204 210 215 216 219 221 222 226 229 ...
##   ..$ FahrC     : chr [1:8818] "T00831078" "T00671083" "T00611089" "T00501089" ...
##   ..$ dtime     : POSIXct[1:8818], format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##   ..$ origMETAR : chr [1:8818] "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##   ..$ TempF     : num [1:8818] 46.9 44.1 43 41 39.9 ...
##   ..$ DewF      : num [1:8818] 18 17.1 16 16 17.1 ...
##   ..$ modSLP    : num [1:8818] 1020 1020 1021 1022 1022 ...
##   ..$ isCLR     : num [1:8818] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ isVV      : num [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV      : logi [1:8818] NA NA NA NA NA NA ...
##   ..$ numFEW    : int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numSCT    : int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numBKN    : int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numOVC    : int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ lowVV     : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC    : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowBKN    : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowSCT    : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowFEW    : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 6 6 6 6 6 6 6 6 6 6 ...
##   ..$ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...
##  $ funcMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8818 obs. of  29 variables:
##   ..$ station          : chr [1:8818] "LAS" "LAS" "LAS" "LAS" ...
##   ..$ valid            : POSIXct[1:8818], format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##   ..$ tmpf             : num [1:8818] 46.9 44.1 43 41 39.9 ...
##   ..$ dwpf             : num [1:8818] 18 17.1 16 16 17.1 ...
##   ..$ relh             : num [1:8818] 31.1 33.4 33.2 35.9 39.2 ...
##   ..$ drct             : num [1:8818] 100 10 10 340 210 230 230 0 0 210 ...
##   ..$ sknt             : num [1:8818] 3 5 5 5 3 3 3 0 0 3 ...
##   ..$ p01i             : chr [1:8818] "0.00" "0.00" "0.00" "0.00" ...
##   ..$ alti             : num [1:8818] 30.1 30.1 30.1 30.1 30.1 ...
##   ..$ mslp             : num [1:8818] 1020 1020 1021 1022 1022 ...
##   ..$ vsby             : num [1:8818] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ gust             : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyc1            : chr [1:8818] "CLR" "CLR" "CLR" "CLR" ...
##   ..$ skyc2            : chr [1:8818] NA NA NA NA ...
##   ..$ skyc3            : chr [1:8818] NA NA NA NA ...
##   ..$ skyc4            : chr [1:8818] NA NA NA NA ...
##   ..$ skyl1            : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl2            : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl3            : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl4            : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8818] NA NA NA NA ...
##   ..$ ice_accretion_1hr: logi [1:8818] NA NA NA NA NA NA ...
##   ..$ ice_accretion_3hr: logi [1:8818] NA NA NA NA NA NA ...
##   ..$ ice_accretion_6hr: logi [1:8818] NA NA NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_drct   : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_time   : POSIXct[1:8818], format: NA NA ...
##   ..$ feel             : num [1:8818] 45.8 40.7 39.4 37.1 37.7 ...
##   ..$ metar            : chr [1:8818] "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##   ..- attr(*, "spec")=
##   .. .. cols(
##   .. ..   station = col_character(),
##   .. ..   valid = col_datetime(format = ""),
##   .. ..   tmpf = col_double(),
##   .. ..   dwpf = col_double(),
##   .. ..   relh = col_double(),
##   .. ..   drct = col_double(),
##   .. ..   sknt = col_double(),
##   .. ..   p01i = col_character(),
##   .. ..   alti = col_double(),
##   .. ..   mslp = col_double(),
##   .. ..   vsby = col_double(),
##   .. ..   gust = col_double(),
##   .. ..   skyc1 = col_character(),
##   .. ..   skyc2 = col_character(),
##   .. ..   skyc3 = col_character(),
##   .. ..   skyc4 = col_character(),
##   .. ..   skyl1 = col_double(),
##   .. ..   skyl2 = col_double(),
##   .. ..   skyl3 = col_double(),
##   .. ..   skyl4 = col_double(),
##   .. ..   wxcodes = col_character(),
##   .. ..   ice_accretion_1hr = col_logical(),
##   .. ..   ice_accretion_3hr = col_logical(),
##   .. ..   ice_accretion_6hr = col_logical(),
##   .. ..   peak_wind_gust = col_double(),
##   .. ..   peak_wind_drct = col_double(),
##   .. ..   peak_wind_time = col_datetime(format = ""),
##   .. ..   feel = col_double(),
##   .. ..   metar = col_character()
##   .. .. )
##  $ windMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8818 obs. of  32 variables:
##   ..$ station          : chr [1:8818] "LAS" "LAS" "LAS" "LAS" ...
##   ..$ valid            : POSIXct[1:8818], format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##   ..$ tmpf             : num [1:8818] 46.9 44.1 43 41 39.9 ...
##   ..$ dwpf             : num [1:8818] 18 17.1 16 16 17.1 ...
##   ..$ relh             : num [1:8818] 31.1 33.4 33.2 35.9 39.2 ...
##   ..$ drct             : num [1:8818] 100 10 10 340 210 230 230 0 0 210 ...
##   ..$ sknt             : num [1:8818] 3 5 5 5 3 3 3 0 0 3 ...
##   ..$ p01i             : chr [1:8818] "0.00" "0.00" "0.00" "0.00" ...
##   ..$ alti             : num [1:8818] 30.1 30.1 30.1 30.1 30.1 ...
##   ..$ mslp             : num [1:8818] 1020 1020 1021 1022 1022 ...
##   ..$ vsby             : num [1:8818] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ gust             : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyc1            : chr [1:8818] "CLR" "CLR" "CLR" "CLR" ...
##   ..$ skyc2            : chr [1:8818] NA NA NA NA ...
##   ..$ skyc3            : chr [1:8818] NA NA NA NA ...
##   ..$ skyc4            : chr [1:8818] NA NA NA NA ...
##   ..$ skyl1            : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl2            : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl3            : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl4            : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8818] NA NA NA NA ...
##   ..$ ice_accretion_1hr: logi [1:8818] NA NA NA NA NA NA ...
##   ..$ ice_accretion_3hr: logi [1:8818] NA NA NA NA NA NA ...
##   ..$ ice_accretion_6hr: logi [1:8818] NA NA NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_drct   : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_time   : POSIXct[1:8818], format: NA NA ...
##   ..$ feel             : num [1:8818] 45.8 40.7 39.4 37.1 37.7 ...
##   ..$ metar            : chr [1:8818] "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##   ..$ dirW             : chr [1:8818] "100" "010" "010" "340" ...
##   ..$ spdW             : num [1:8818] 3 5 5 5 3 3 3 0 0 3 ...
##   ..$ gustW            : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##  $ initMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8818 obs. of  13 variables:
##   ..$ METAR     : chr [1:8818] "56Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "56Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "56Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089" "56Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##   ..$ WindDir   : chr [1:8818] "100" "010" "010" "340" ...
##   ..$ WindSpeed : chr [1:8818] "03" "05" "05" "05" ...
##   ..$ WindGust  : chr [1:8818] NA NA NA NA ...
##   ..$ Dummy     : chr [1:8818] " " " " " " " " ...
##   ..$ Visibility: chr [1:8818] "10SM" "10SM" "10SM" "10SM" ...
##   ..$ TempC     : chr [1:8818] "08" "07" "06" "05" ...
##   ..$ DewC      : chr [1:8818] "M08" "M08" "M09" "M09" ...
##   ..$ Altimeter : chr [1:8818] "A3009" "A3011" "A3013" "A3014" ...
##   ..$ SLP       : chr [1:8818] "SLP196" "SLP204" "SLP210" "SLP215" ...
##   ..$ FahrC     : chr [1:8818] "T00831078" "T00671083" "T00611089" "T00501089" ...
##   ..$ dtime     : POSIXct[1:8818], format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##   ..$ origMETAR : chr [1:8818] "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##  $ convMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8818 obs. of  15 variables:
##   ..$ METAR     : chr [1:8818] "56Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "56Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "56Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089" "56Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##   ..$ WindDir   : chr [1:8818] "100" "010" "010" "340" ...
##   ..$ WindSpeed : int [1:8818] 3 5 5 5 3 3 3 0 0 3 ...
##   ..$ WindGust  : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8818] " " " " " " " " ...
##   ..$ Visibility: num [1:8818] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8818] 8 7 6 5 4 3 2 2 1 1 ...
##   ..$ DewC      : int [1:8818] -8 -8 -9 -9 -8 -8 -9 -9 -8 -8 ...
##   ..$ Altimeter : int [1:8818] 3009 3011 3013 3014 3015 3016 3017 3017 3018 3019 ...
##   ..$ SLP       : int [1:8818] 196 204 210 215 216 219 221 222 226 229 ...
##   ..$ FahrC     : chr [1:8818] "T00831078" "T00671083" "T00611089" "T00501089" ...
##   ..$ dtime     : POSIXct[1:8818], format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##   ..$ origMETAR : chr [1:8818] "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##   ..$ TempF     : num [1:8818] 46.9 44.1 43 41 39.9 ...
##   ..$ DewF      : num [1:8818] 18 17.1 16 16 17.1 ...
##  $ parseMETAR     :Classes 'tbl_df', 'tbl' and 'data.frame': 8818 obs. of  16 variables:
##   ..$ METAR     : chr [1:8818] "56Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "56Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "56Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089" "56Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##   ..$ WindDir   : chr [1:8818] "100" "010" "010" "340" ...
##   ..$ WindSpeed : int [1:8818] 3 5 5 5 3 3 3 0 0 3 ...
##   ..$ WindGust  : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8818] " " " " " " " " ...
##   ..$ Visibility: num [1:8818] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8818] 8 7 6 5 4 3 2 2 1 1 ...
##   ..$ DewC      : int [1:8818] -8 -8 -9 -9 -8 -8 -9 -9 -8 -8 ...
##   ..$ Altimeter : int [1:8818] 3009 3011 3013 3014 3015 3016 3017 3017 3018 3019 ...
##   ..$ SLP       : int [1:8818] 196 204 210 215 216 219 221 222 226 229 ...
##   ..$ FahrC     : chr [1:8818] "T00831078" "T00671083" "T00611089" "T00501089" ...
##   ..$ dtime     : POSIXct[1:8818], format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##   ..$ origMETAR : chr [1:8818] "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##   ..$ TempF     : num [1:8818] 46.9 44.1 43 41 39.9 ...
##   ..$ DewF      : num [1:8818] 18 17.1 16 16 17.1 ...
##   ..$ modSLP    : num [1:8818] 1020 1020 1021 1022 1022 ...
##  $ initClouds     :List of 2
##   ..$ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':    8818 obs. of  7 variables:
##   .. ..$ isCLR : num [1:8818] 1 1 1 1 1 1 1 1 1 1 ...
##   .. ..$ isVV  : num [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ htVV  : logi [1:8818] NA NA NA NA NA NA ...
##   .. ..$ numFEW: int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ numSCT: int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ numBKN: int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ numOVC: int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ mtxCloud : chr [1:8818, 1:12] "" "" "" "" ...
##  $ processedClouds:List of 3
##   ..$ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame':  8818 obs. of  5 variables:
##   .. ..$ lowVV : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowOVC: num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowBKN: num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowSCT: num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowFEW: num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ minCeilingLevel: num [1:8818] 1e+06 1e+06 1e+06 1e+06 1e+06 ...
##   ..$ minCloudLevel  : num [1:8818] 1e+06 1e+06 1e+06 1e+06 1e+06 ...

Example #25: More Generic Downloading the Data by Function

The download function can be more generic, with just a few key arguments passed to it. Those arguments can then be derived in a helper function, with the generic function called from inside that.

Example code includes:

genericGetASOSData <- function(fileLoc, 
                               stationID,
                               startDate, 
                               endDate,
                               downloadMethod="curl", 
                               baseURL="https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?", 
                               dataFields="all", 
                               dataTZ="Etc%2FUTC", 
                               dataFormat="onlycomma", 
                               dataLatLon="no", 
                               dataMissing="M", 
                               dataTrace="T", 
                               dataDirect="no", 
                               dataType=2
                               ) {
    
    # Get the year, day, and hour of the key dates
    y1 <- lubridate::year(startDate)
    m1 <- lubridate::month(startDate)
    d1 <- lubridate::day(startDate)
    
    y2 <- lubridate::year(endDate)
    m2 <- lubridate::month(endDate)
    d2 <- lubridate::day(endDate)

    # Mimic the string shown below
    # https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=LAS&data=all&year1=2015&month1=12&day1=31&year2=2017&month2=1&day2=2&tz=Etc%2FUTC&format=onlycomma&latlon=no&missing=M&trace=T&direct=no&report_type=2
    
    useURL <- paste0(baseURL, "station=", stationID)  # add the desired station
    useURL <- paste0(useURL, "&data=", dataFields)  # default is "all
    useURL <- paste0(useURL, "&year1=", y1, "&month1=", m1, "&day1=", d1)  # Specify start ymd
    useURL <- paste0(useURL, "&year2=", y2, "&month2=", m2, "&day2=", d2)  # Specify end ymd
    useURL <- paste0(useURL, "&tz=", dataTZ)  # time zone (default UTC)
    useURL <- paste0(useURL, "&format=", dataFormat)  # file format (default CSV)
    useURL <- paste0(useURL, "&latlon=", dataLatLon)  # Whether to include lat-lon (default no)
    useURL <- paste0(useURL, "&missing=", dataMissing)  # How to handle missing data (default is 'M')
    useURL <- paste0(useURL, "&trace=", dataTrace)  # How to handle trace data (default is 'T')
    useURL <- paste0(useURL, "&direct=", dataDirect)  # Whether to directly get the data (default is 'no')
    useURL <- paste0(useURL, "&report_type=", dataType)  # Whether to get just METAR (2, default)
    
    # Download the file
    cat("\nDownloading from:", useURL, "\nDownloading to:", fileLoc, "\n")
    download.file(useURL, destfile=fileLoc, method=downloadMethod)
        
    return(TRUE)
}

Which can then be called by a function that checks whether the file already exists:

getASOSStationTime <- function(stationID, 
                               startDate=NULL, 
                               endDate=NULL, 
                               analysisYears=NULL, 
                               fileLoc=NULL,
                               ovrWrite=FALSE,
                               ...) {
    
    # Get the relevant time period for the data
    if (is.null(analysisYears) & (is.null(startDate) | is.null(endDate))) {
        stop("Must provide either analysisYears or both of startDate and endDate")
    }
    if (!is.null(startDate) & !is.null(endDate) & !is.null(analysisYears)) {
        stop("Should specify EITHER both of startDate and endDate OR analysisYears BUT NOT both")
    }
    if (is.null(startDate)) {
        startDate <- ISOdate(min(analysisYears)-1, 12, 31, hour=0)
        endDate <- ISOdate(max(analysisYears)+1, 1, 2, hour=0)
    }
    
    # Create the file name
    if (!is.null(analysisYears)) {
        if (length(analysisYears) == 1) { timeDesc <- analysisYears }
        else { timeDesc <- paste0(min(analysisYears), "-", max(analysisYears)) }
    } else {
        timeDesc <- paste0(lubridate::year(startDate), 
                           str_pad(lubridate::month(startDate), 2, pad="0"),
                           str_pad(lubridate::day(startDate), 2, pad="0"), 
                           "-", 
                           lubridate::year(endDate), 
                           str_pad(lubridate::month(endDate), 2, pad="0"), 
                           str_pad(lubridate::day(endDate), 2, pad="0")
                           )
    }
    
    if (is.null(fileLoc)) {
        fileLoc <- paste0("./RInputFiles/", "metar_k", str_to_lower(stationID), "_", timeDesc, ".txt")
    }
    
    cat("\nData for station", stationID, "from", as.character(startDate), "to", 
        as.character(endDate), "will download to", fileLoc, "\n"
        )
    
    if (file.exists(fileLoc) & !ovrWrite) {
        stop("File already exists, aborting")
    }
    
    genericGetASOSData(fileLoc=fileLoc, stationID=stationID, startDate=startDate, endDate=endDate, ...)
    
}

And the files can then be run (cached to avoid multiple hits to the Iowa State server):

# Get data for EWR for 2016
getASOSStationTime(stationID="EWR", analysisYears=2016)
## 
## Data for station EWR from 2015-12-31 to 2017-01-02 will download to ./RInputFiles/metar_kewr_2016.txt 
## 
## Downloading from: https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=EWR&data=all&year1=2015&month1=12&day1=31&year2=2017&month2=1&day2=2&tz=Etc%2FUTC&format=onlycomma&latlon=no&missing=M&trace=T&direct=no&report_type=2 
## Downloading to: ./RInputFiles/metar_kewr_2016.txt
## [1] TRUE
# Get data for ATL for 2016-2018
getASOSStationTime(stationID="ATL", analysisYears=2016:2018)
## 
## Data for station ATL from 2015-12-31 to 2019-01-02 will download to ./RInputFiles/metar_katl_2016-2018.txt 
## 
## Downloading from: https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=ATL&data=all&year1=2015&month1=12&day1=31&year2=2019&month2=1&day2=2&tz=Etc%2FUTC&format=onlycomma&latlon=no&missing=M&trace=T&direct=no&report_type=2 
## Downloading to: ./RInputFiles/metar_katl_2016-2018.txt
## [1] TRUE
# Get data for DFW for 2016-03-31 to 2017-03-01
getASOSStationTime(stationID="DFW", 
                   startDate=ISOdate(2016, 03, 31, hour=0), 
                   endDate=ISOdate(2017, 3, 1, hour=0)
                   )
## 
## Data for station DFW from 2016-03-31 to 2017-03-01 will download to ./RInputFiles/metar_kdfw_20160331-20170301.txt 
## 
## Downloading from: https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=DFW&data=all&year1=2016&month1=3&day1=31&year2=2017&month2=3&day2=1&tz=Etc%2FUTC&format=onlycomma&latlon=no&missing=M&trace=T&direct=no&report_type=2 
## Downloading to: ./RInputFiles/metar_kdfw_20160331-20170301.txt
## [1] TRUE

Example #26: Comparative Wind Directions by Location

The processed data files can be compared, with wind directions being one of the comparison sets.

Example code includes:

# Extract the wind direction data from a processed METAR file
getWindDirGroup <- function(keyList, src) {
    
    # Use the fullMETAR data and extract WindDir, WindSpeed, month
    windPlotData <- get(keyList)[["fullMETAR"]] %>%
        select(WindDir, WindSpeed, month) %>%
        mutate(windDirGroup=factor(case_when(WindSpeed==0 ~ "No Wind", 
                                             WindDir=="VRB" ~ "Variable", 
                                             WindDir %in% c("030", "040", "050", "060") ~ "NE", 
                                             WindDir %in% c("070", "080", "090", "100", "110") ~ "E", 
                                             WindDir %in% c("120", "130", "140", "150") ~ "SE", 
                                             WindDir %in% c("160", "170", "180", "190", "200") ~ "S", 
                                             WindDir %in% c("210", "220", "230", "240") ~ "SW", 
                                             WindDir %in% c("250", "260", "270", "280", "290") ~ "W", 
                                             WindDir %in% c("300", "310", "320", "330") ~ "NW", 
                                             WindDir %in% c("340", "350", "360", "010", "020") ~ "N", 
                                            TRUE ~ "Error"
                                            ) , levels=c("No Wind", "Variable", "Error", 
                                                         "N", "NE", "E", "SE", "S", "SW", "W", "NW"
                                                         )
                                   )
               )
    
    # Rempve the errors and calculate percentages by month for the remainder
    processedWindData <- windPlotData %>%
        filter(windDirGroup != "Error") %>%
        group_by(month, windDirGroup) %>%
        summarize(n=n()) %>%
        ungroup() %>%
        group_by(month) %>%
        mutate(pct=n/sum(n)) %>%
        ungroup() %>%
        mutate(src=src)
    
    processedWindData

}


# Consolidate and plot wind data
consolidatePlotWind <- function(files, names) {

    consFun <- function(x, y) { getWindDirGroup(keyList=x, src=y) }
    boundByRows <- map2_dfr(.x=files, .y=names, .f=consFun)

    p <- boundByRows %>%
        ggplot(aes(x=month, y=pct, color=src)) + 
        geom_line(aes(group=src)) + 
        facet_wrap(~windDirGroup) + 
        labs(title="Wind Direction Frequency by Month", x="Month", y="Frequency of Wind Observations") +
        theme(axis.text.x=element_text(angle=90))
    print(p)
    
    boundByRows
}

# Run for 2016 with Lincoln, NE; Las Vegas, NV; and Chicago, IL
cpWind <- consolidatePlotWind(files=c("klnk2016METAR", "klas2016METAR", "kord2016METAR"), 
                              names=c("Lincoln, NE (2016)", "Las Vegas, NV (2016)", "Chicago, IL (2016)")
                              )

cpWind
## # A tibble: 359 x 5
##    month windDirGroup     n     pct src               
##    <fct> <fct>        <int>   <dbl> <chr>             
##  1 Jan   No Wind         76 0.0992  Lincoln, NE (2016)
##  2 Jan   Variable         3 0.00392 Lincoln, NE (2016)
##  3 Jan   N              179 0.234   Lincoln, NE (2016)
##  4 Jan   NE              31 0.0405  Lincoln, NE (2016)
##  5 Jan   E               30 0.0392  Lincoln, NE (2016)
##  6 Jan   SE              63 0.0822  Lincoln, NE (2016)
##  7 Jan   S              174 0.227   Lincoln, NE (2016)
##  8 Jan   SW              57 0.0744  Lincoln, NE (2016)
##  9 Jan   W               65 0.0849  Lincoln, NE (2016)
## 10 Jan   NW              88 0.115   Lincoln, NE (2016)
## # ... with 349 more rows

Example #27: Extracting Precipitation Information from METAR

METAR data include descriptions of the precipitation occuring at any given time. Two of the most common precipitation forms are rain (RA) and SN(). These can occur together, denoted as RASN or SNRA in the METAR.

Further, the precipitation type can be classified using a prefix as light (-), moderate (no prefix), or heavy (+). So, RA would be moderate rain, -SNRA would be a light snow-rain mix, +RA would be heavy rain.

Additionally, the timing of the precipitation event is captured in the remarks using B (begin) and E (end). So, an hourly METAR of RAB20E35B50 would mean rain started at 20 past the hour, ended at 35 past the hour, and began again at 50 past the hour. Since METAR are often taken just before the top of the hour, a four-digit time is used if it is in the ‘previous’ hour; for example, RAB1959E36 in the 2053Z METAR.

We can use the remarks to see when it was raining in the given METAR.

Example code includes:

# Extract the METAR and the date-time from a processed list
procMET <- klas2016METAR$fullMETAR %>% 
    select(origMETAR, dtime)

# Check whether there are comments for either rain regins (RAB) or rain ends (RAE) and pull all the data
procMET <- procMET %>% 
    mutate(isRain=grepl("RA[B|E]", origMETAR), 
           rainData=str_extract(origMETAR, pattern="(RA[B|E]\\d+[0-9BE]*)"), 
           nBegin=pmax(0, str_count(rainData, "B"), na.rm=TRUE), 
           nEnd=pmax(0, str_count(rainData, "E"), na.rm=TRUE), 
           dateUTC=lubridate::date(dtime), 
           hourUTC=lubridate::hour(dtime)
           )
str(procMET)
## Classes 'tbl_df', 'tbl' and 'data.frame':    8818 obs. of  8 variables:
##  $ origMETAR: chr  "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##  $ dtime    : POSIXct, format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##  $ isRain   : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ rainData : chr  NA NA NA NA ...
##  $ nBegin   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC  : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC  : int  0 1 2 3 4 5 6 7 8 9 ...
# Check the counts of rain beginning and rain ending
procMET %>%
    count(isRain, nBegin, nEnd)
## # A tibble: 7 x 4
##   isRain nBegin  nEnd     n
##   <lgl>   <dbl> <dbl> <int>
## 1 FALSE       0     0  8643
## 2 TRUE        0     1    56
## 3 TRUE        1     0    51
## 4 TRUE        1     1    59
## 5 TRUE        1     2     2
## 6 TRUE        2     1     6
## 7 TRUE        2     2     1
# Extract all the times when rain began (can be multiple per METAR)
rainMET <- procMET %>%
    select(dateUTC, hourUTC, rainData, nBegin, nEnd)

# Confirm the rainMET is unique by dateUTC and hourUTC
dupAns <- rainMET %>%
    select(dateUTC, hourUTC) %>%
    duplicated() %>%
    any()
cat("\nAre there any problems with duplicated keys?", dupAns, "\n")
## 
## Are there any problems with duplicated keys? FALSE
# Extract the matrix of rain beginning data
rainBegin <- rainMET %>% 
    pull(rainData) %>% 
    str_extract_all("B\\d+", simplify=TRUE) %>%
    as.data.frame(stringsAsFactors=FALSE)

# Extract the matrix of rain ending data
rainEnd <- rainMET %>% 
    pull(rainData) %>% 
    str_extract_all("E\\d+", simplify=TRUE) %>%
    as.data.frame(stringsAsFactors=FALSE)

# Convert the rain begins data to the hour and minute associated to the UTC
extractTime <- function(x, var, sym="B") {
    if (is.na(x[var]) | x[var]=="") {
        utcUse <- NA
    }
    else {
        utcUse <- str_replace(x[var], sym, "")
        if (str_length(utcUse)==4) {
            utcUse <- paste0(x["dateUTC"], " ", utcUse)
        } else if (str_length(utcUse)==2) {
            utcUse <- paste0(x["dateUTC"], " ", x["hourChar"], utcUse)
        } else {
            cat("\nCannot parse data: ", x, "\n", x[var], "\n", var, sym, utcUse)
            stop()
        }
    }
}

# Extract the begin times from V1
beginTime1 <- rainBegin %>%
    cbind(rainMET[, c("dateUTC", "hourUTC")]) %>%
    mutate(hourChar=str_pad(str_trim(as.character(hourUTC)), width=2, pad="0")) %>%
    apply(1, FUN=extractTime, var="V1", sym="B")

# Extract the begin times from V2
beginTime2 <- rainBegin %>%
    cbind(rainMET[, c("dateUTC", "hourUTC")]) %>%
    mutate(hourChar=str_pad(str_trim(as.character(hourUTC)), width=2, pad="0")) %>%
    apply(1, FUN=extractTime, var="V2", sym="B")

# Extract the end times from V1
endTime1 <- rainEnd %>%
    cbind(rainMET[, c("dateUTC", "hourUTC")]) %>%
    mutate(hourChar=str_pad(str_trim(as.character(hourUTC)), width=2, pad="0")) %>%
    apply(1, FUN=extractTime, var="V1", sym="E")

# Extract the end times from V2
endTime2 <- rainEnd %>%
    cbind(rainMET[, c("dateUTC", "hourUTC")]) %>%
    mutate(hourChar=str_pad(str_trim(as.character(hourUTC)), width=2, pad="0")) %>%
    apply(1, FUN=extractTime, var="V2", sym="E")

allBegins <- 
    c(beginTime1[!is.na(beginTime1)], beginTime2[!is.na(beginTime2)]) %>%
    lubridate::ymd_hm() %>%
    sort()

allEnds <- 
    c(endTime1[!is.na(endTime1)], endTime2[!is.na(endTime2)]) %>%
    lubridate::ymd_hm() %>%
    sort()

# Check the each ending is after its associated beginning
endMinusBeg <- allEnds - allBegins
## Warning in unclass(time1) - unclass(time2): longer object length is not a
## multiple of shorter object length
# Rain is listed as ending at 2016-05-01 00:42 and 2016-04-30 17:38 both for start 2016-04-30 11:20
endMinusBeg <- allEnds[allEnds != lubridate::ymd_hm("2016-05-01 0042")] - allBegins

# Plot the rain durations in minutes
data.frame(minutesRain=as.numeric(endMinusBeg), month=lubridate::month(allBegins)) %>%
    ggplot(aes(x=minutesRain)) +
    geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Plot the rain totals (in hours) by month
data.frame(minutesRain=as.numeric(endMinusBeg), month=lubridate::month(allBegins)) %>%
    group_by(month) %>%
    summarize(minutesRain=sum(minutesRain), nRain=n()) %>%
    ggplot(aes(x=factor(month.abb[month], levels=month.abb[1:12]), y=minutesRain/60)) +
    geom_col() + 
    labs(title="Las Vegas, NV Rainfall (hours) in 2016", y="Hours of Rain", x="Month") + 
    geom_text(aes(y=2+minutesRain/60, label=round(minutesRain/60, 1)))

Example #28: Function for Extracting Precipitation Information from METAR

The precipitation extraction process can be converted to functions. In addition to being more modular, several additional features can be included:

  • Allow to search for other precipitation types, specifically snow (SN) or drizzle (DZ)
  • Correct for the isue that a time of 2016-01-04 2359Z is placed in the 206-01-05 bucket by the code above
  • Identify areas where the intervals are not sensible or raise significant questions (negative durations, durations longer than 24 hours)
  • Check that intervals overlap with the time periods in the METAR that show that precipitation event

Example code includes:

  • Extract the precipitation data from a processed METAR file
  • Find the begin and end times from this file
  • Check for intervals of questionable length and correct as needed
# Extract the precipitation data from a processed METAR file
extractPrecipData <- function(processedFile, pType="RA") {

    # Extract the METAR and the date-time from a processed list
    procMET <- processedFile[["fullMETAR"]] %>% 
        select(origMETAR, dtime)

    # Check whether there are comments for the desired pType either beginning or ending
    keyPattern <- paste0("(", pType, "[B|E]\\d+[0-9BE]*)")
    cat("\nRegex search code is:", keyPattern, "\n\n")
    
    procMET <- procMET %>% 
        mutate(precipData=str_extract(origMETAR, pattern=keyPattern), 
               isPrecip=grepl(paste0(pType, "[B|E]"), origMETAR), 
               nBegin=pmax(0, str_count(precipData, "B"), na.rm=TRUE), 
               nEnd=pmax(0, str_count(precipData, "E"), na.rm=TRUE), 
               dateUTC=lubridate::date(dtime), 
               hourUTC=lubridate::hour(dtime)
               )
    str(procMET)
    cat("\n")

    # Check the counts of precipitation beginning and rain ending
    procMET %>%
        count(isPrecip, nBegin, nEnd) %>%
        print()

    # Check that the file is unique by time
    dupAns <- procMET %>%
        select(dateUTC, hourUTC) %>%
        duplicated %>%
        any()

    cat("\nAre there any problems with duplicated keys?", dupAns, "\n")
    
    procMET
}

testFileProc <-extractPrecipData(klas2016METAR, pType="RA")
## 
## Regex search code is: (RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8818 obs. of  8 variables:
##  $ origMETAR : chr  "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8643
## 2 TRUE          0     1    56
## 3 TRUE          1     0    51
## 4 TRUE          1     1    59
## 5 TRUE          1     2     2
## 6 TRUE          2     1     6
## 7 TRUE          2     2     1
## 
## Are there any problems with duplicated keys? FALSE

Next, a function for extrating beginning and ending times can be written:

# Helper function to extract the beginning and ending times using str_extract_all
getBeginEndTimeMatrix <- function(file, pullVar="precipData", pState="B") {
    file %>%
        pull(pullVar) %>%
        str_extract_all(paste0(pState, "\\d+"), simplify=TRUE) %>%
        as.data.frame(stringsAsFactors=FALSE)    
}

testBegin <- getBeginEndTimeMatrix(testFileProc, pState="B")
testEnd <- getBeginEndTimeMatrix(testFileProc, pState="E")

if (ncol(testBegin) != 2 | ncol(testEnd) != 2) { 
    stop("Hard-coded for 2 columns each of begin/end- Fix") 
}

# Helper function to convert begin and end time using date and hour
extractTime <- function(x, var, sym="B") {
    if (is.na(x[var]) | x[var]=="") {
        utcReturn <- NA
    }
    else {
        utcUse <- str_replace(x[var], sym, "")
        if (str_length(utcUse)==4) {
            utcReturn <- paste0(x["dateUTC"], " ", utcUse)
            # If a 4-digit time starts with 23 and is in the 0Z METAR, it is part of the previous day
            if(str_sub(utcUse, 1, 2)=="23" & as.numeric(x["hourChar"])==0) {
                utcReturn <- paste0(as.Date(x["dateUTC"]) - 1, " ", utcUse)
            }
        } else if (str_length(utcUse)==2) {
            utcReturn <- paste0(x["dateUTC"], " ", x["hourChar"], utcUse)
        } else {
            cat("\nCannot parse data: ", x, "\n", x[var], "\n", var, sym, utcUse)
            stop()
        }
    }
    utcReturn
}

getBeginEndTimeVector <- function(timeExtractFile, origFullFile, extractVar, extractSym) {
    timeExtractFile %>%
        cbind(origFullFile[, c("dateUTC", "hourUTC")]) %>%
        mutate(hourChar=str_pad(str_trim(as.character(hourUTC)), width=2, pad="0")) %>%
        apply(1, FUN=extractTime, var=extractVar, sym=extractSym)
}

testBT1 <- getBeginEndTimeVector(testBegin, testFileProc, extractVar="V1", extractSym="B")
testBT2 <- getBeginEndTimeVector(testBegin, testFileProc, extractVar="V2", extractSym="B")
testET1 <- getBeginEndTimeVector(testEnd, testFileProc, extractVar="V1", extractSym="E")
testET2 <- getBeginEndTimeVector(testEnd, testFileProc, extractVar="V2", extractSym="E")

testAllBegins <- 
    c(testBT1[!is.na(testBT1)], testBT2[!is.na(testBT2)]) %>%
    lubridate::ymd_hm() %>%
    sort()

testAllEnds <- 
    c(testET1[!is.na(testET1)], testET2[!is.na(testET2)]) %>%
    lubridate::ymd_hm() %>%
    sort()

Next, create the time intervals vector, with the capability to change the start state, check the intervals, exclude times as needed, and compare to raw METAR.

Example code includes:

# Function to create the time intervals data
createPrecipInterval <- function(endVector, beginVector, endExclude=c(), beginExclude=c(), 
                                 sState=FALSE, nMinPrint=1, maxProb=1000, nMaxPrint=1
                                 ) {
    
    # If the starting state is one of precipitation, allow it to 'burn in' by deleting the first end time
    if(sState) { endVector <- endVector[2:length(endVector)] }
    
    # Create the interval data
    endsUse <- endVector[!(endVector %in% lubridate::ymd_hm(endExclude))]
    beginsUse <- beginVector[!(beginVector %in% lubridate::ymd_hm(beginExclude))]
    intervalData <- endsUse - beginsUse
    
    # Show a summary of the interval data
    print(summary(as.numeric(intervalData)))
    
    # If there are any non-positive intervals, print the data causing the first of them
    if (min(as.numeric(intervalData)) <= 0) {
        cat("\nProblem Detected - Intervals are not positive.  Data to help investigate\n")
        posns <- which(as.numeric(intervalData) <= 0)
        posns <- max(1, posns[1]-5):min(length(beginsUse), length(endsUse), posns[nMinPrint]+5)
        cat("\nVector of Begins\n")
        print(lubridate::as_datetime(beginsUse[posns]))
        cat("\nVector of Ends\n")
        print(lubridate::as_datetime(endsUse[posns]))
        cat("\n")
    }
    
    # If there are any very long positive intervals, print the data causing the first five of them
    if (max(as.numeric(intervalData)) >= maxProb) {
        cat("\nPotential problem Detected - very long.  Data to help investigate\n")
        posns <- which(as.numeric(intervalData) >= maxProb)
        cat("\nPositions with problems are:", posns)
        posns <- max(1, posns[1]-5):min(length(beginsUse), length(endsUse), posns[min(length(posns), nMaxPrint)]+5)
        cat("\nVector of Begins\n")
        print(lubridate::as_datetime(beginsUse[posns]))
        cat("\nVector of Ends\n")
        print(lubridate::as_datetime(endsUse[posns]))
        cat("\n")
    }
    
    # Return the interval data
    intervalData
}

# Run a full pass for Las Vegas Rain
testIntervals <- createPrecipInterval(testAllEnds, testAllBegins)
## Warning in unclass(time1) - unclass(time2): longer object length is not a
## multiple of shorter object length
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -41221.0   -454.5    -16.0   1382.7     30.0 520731.0 
## 
## Problem Detected - Intervals are not positive.  Data to help investigate
## 
## Vector of Begins
##  [1] "2016-04-28 21:33:00 UTC" "2016-04-29 00:34:00 UTC"
##  [3] "2016-04-29 03:27:00 UTC" "2016-04-30 08:51:00 UTC"
##  [5] "2016-04-30 11:20:00 UTC" "2016-05-01 04:16:00 UTC"
##  [7] "2016-05-01 04:55:00 UTC" "2016-05-06 14:35:00 UTC"
##  [9] "2016-05-06 19:20:00 UTC" "2016-05-07 07:24:00 UTC"
## [11] "2016-05-07 22:49:00 UTC"
## 
## Vector of Ends
##  [1] "2016-04-28 21:42:00 UTC" "2016-04-29 00:58:00 UTC"
##  [3] "2016-04-29 03:51:00 UTC" "2016-04-30 09:53:00 UTC"
##  [5] "2016-04-30 17:38:00 UTC" "2016-05-01 00:42:00 UTC"
##  [7] "2016-05-01 04:31:00 UTC" "2016-05-01 05:10:00 UTC"
##  [9] "2016-05-06 15:12:00 UTC" "2016-05-06 19:30:00 UTC"
## [11] "2016-05-07 07:45:00 UTC"
## 
## 
## Potential problem Detected - very long.  Data to help investigate
## 
## Positions with problems are: 127
## Vector of Begins
## [1] "2016-12-25 00:16:00 UTC" "2016-12-30 22:09:00 UTC"
## [3] "2016-12-30 23:44:00 UTC" "2016-12-31 03:59:00 UTC"
## [5] "2016-12-31 05:45:00 UTC"
## 
## Vector of Ends
## [1] "2016-12-24 14:06:00 UTC" "2016-12-25 00:32:00 UTC"
## [3] "2016-12-30 22:35:00 UTC" "2016-12-31 00:04:00 UTC"
## [5] "2016-12-31 04:12:00 UTC"
testIntervals <- createPrecipInterval(testAllEnds, testAllBegins, endExclude=c("2016-05-01 0042"))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00   16.00   29.00   52.15   57.75  502.00
createPrecipIntervalPlots <- function(intervalData, beginsData, titleText, yAxisText, 
                                      beginExclude=c(), returnPlotsAndData=FALSE
                                      ) {

    # Exclude any data from begins as needed
    beginsData <- beginsData[!(beginsData %in% lubridate::ymd_hm(beginExclude))]
    
    # Create a plotting data frame
    histFrame <- data.frame(minutesPrecip=as.numeric(intervalData), 
                            month=lubridate::month(beginsData), 
                            rainDate=lubridate::date(beginsData)
                            ) %>%
        mutate(hoursPrecip=minutesPrecip/60)
    
    # Plot the precipitation durations in hours
    p1 <- histFrame %>%
        ggplot(aes(x=hoursPrecip)) +
        geom_histogram() + 
        labs(title=titleText, x=yAxisText, 
             subtitle="Distribution of hours per unique precipitation event"
             )
    print(p1)

    # Plot the precipitation by day in hours
    p2 <- histFrame %>%
        group_by(rainDate) %>%
        summarize(hoursPrecip=sum(hoursPrecip)) %>%
        ggplot(aes(x=hoursPrecip)) +
        geom_histogram() + 
        labs(title=titleText, x=yAxisText, 
             subtitle="Distribution of hours per day of precipitation (on days when 1+ minutes occurred)"
             )
    print(p2)
    
    # Plot the rain totals (in hours) by month
    # Create a data frame of all months and merge in precipitation data as an where available (0 otherwise)
    monthFrame <- histFrame %>%
        group_by(month) %>%
        summarize(minutesPrecip=sum(minutesPrecip), hoursPrecip=sum(hoursPrecip), nPrecip=n()) %>%
        right_join(data.frame(month=1:12, monthName=month.abb[1:12]), by="month") %>%
        tidyr::replace_na(list(minutesPrecip=0, hoursPrecip=0, nPrecip=0))
    # print(monthFrame)
    
    p3 <- monthFrame %>%
        ggplot(aes(x=factor(monthName, levels=month.abb[1:12]), y=hoursPrecip)) +
        geom_col() + 
        labs(title=titleText, y=yAxisText, x="") + 
        geom_text(aes(y=2 + hoursPrecip, label=round(hoursPrecip, 1)))
    print(p3)
    
    if (returnPlotsAndData) {
        list(histFrame=histFrame, monthFrame=monthFrame, p1=p1, p2=p2, p3=p3)
    } else {
        NULL
    }
}

# Plots for Las Vegas Rain
createPrecipIntervalPlots(testIntervals, testAllBegins, 
                          titleText="Las Vegas, NV Rainfall (hours) in 2016", yAxisText="Hours of Rain"
                          )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## NULL

Example #29: Combining Functions and Extending to Other Locales and Precipitation Types

The functions can be combined, and the approach then extended to other locales and precipitation types.

Example code includes:

# Combining all the functions in one place
runFullPrecipExtraction <- function(df, 
                                    pType, 
                                    titleText, 
                                    yAxisText,
                                    endExclude=c(), 
                                    beginExclude=c(), 
                                    maxProb=1000, 
                                    sState=FALSE, 
                                    makePlots=TRUE, 
                                    returnPlotsAndData=FALSE
                                    ) {
    
    # Extract the precipitation data from a specified processed METAR file
    testFileProc <-extractPrecipData(df, pType=pType)
    
    # Confirm that the two-column specification is met (should relax hard-coding on this)
    testBegin <- getBeginEndTimeMatrix(testFileProc, pState="B")
    testEnd <- getBeginEndTimeMatrix(testFileProc, pState="E")

    if (ncol(testBegin) != 2 | ncol(testEnd) != 2) { 
        stop("Hard-coded for 2 columns each of begin/end- Fix")
    }
    
    # Extract the beginning and ending information
    testBT1 <- getBeginEndTimeVector(testBegin, testFileProc, extractVar="V1", extractSym="B")
    testBT2 <- getBeginEndTimeVector(testBegin, testFileProc, extractVar="V2", extractSym="B")
    testET1 <- getBeginEndTimeVector(testEnd, testFileProc, extractVar="V1", extractSym="E")
    testET2 <- getBeginEndTimeVector(testEnd, testFileProc, extractVar="V2", extractSym="E")

    testAllBegins <- 
        c(testBT1[!is.na(testBT1)], testBT2[!is.na(testBT2)]) %>%
        lubridate::ymd_hm() %>%
        sort()

    testAllEnds <- 
        c(testET1[!is.na(testET1)], testET2[!is.na(testET2)]) %>%
        lubridate::ymd_hm() %>%
        sort()
    
    # Create the intervals
    testIntervals <- createPrecipInterval(testAllEnds, testAllBegins, endExclude=endExclude, 
                                          beginExclude=beginExclude, maxProb=maxProb, sState=sState)
    
    # Create the precipitation plots
    plotOut <- NULL
    if (makePlots) {
        plotOut <- createPrecipIntervalPlots(testIntervals, testAllBegins, titleText=titleText, 
                                             yAxisText=yAxisText, beginExclude=beginExclude, 
                                             returnPlotsAndData=returnPlotsAndData
                                             )
    }
    
    if (!returnPlotsAndData) { plotOut <- NULL }
    
    # Return all of the key files, along with the parameters used
    keyParams <- list(fileName=deparse(substitute(df)), pType=pType, 
                      endExclude=endExclude, beginExclude=beginExclude, 
                      maxProb=maxProb, sState=sState
                      )
    list(keyParams=keyParams, 
         testFileProc=testFileProc, 
         testAllBegins=testAllBegins[!(testAllBegins %in% lubridate::ymd_hm(beginExclude))], 
         testAllEnds=testAllEnds[!(testAllEnds %in% lubridate::ymd_hm(endExclude))], 
         testIntervals=testIntervals,
         plotOut=plotOut
         )
}

The full function can then be run to replicate the Las Vegas, NV 2016 rain plots:

# Run for Las Vegas, NV 2016 rainfall
klasRain2016 <- runFullPrecipExtraction(klas2016METAR, 
                                        pType="RA", 
                                        titleText="Las Vegas, NV Rainfall (hours) in 2016", 
                                        yAxisText="Hours of Rain", 
                                        endExclude=c("2016-05-01 0042"),
                                        beginExclude=c(), 
                                        maxProb=1000, 
                                        sState=FALSE
                                        )
## 
## Regex search code is: (RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8818 obs. of  8 variables:
##  $ origMETAR : chr  "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8643
## 2 TRUE          0     1    56
## 3 TRUE          1     0    51
## 4 TRUE          1     1    59
## 5 TRUE          1     2     2
## 6 TRUE          2     1     6
## 7 TRUE          2     2     1
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00   16.00   29.00   52.15   57.75  502.00
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Confirm that results are the same as when run outside the function
identical(testIntervals, klasRain2016$testIntervals)
## [1] TRUE
identical(testFileProc, klasRain2016$testFileProc)
## [1] TRUE
identical(testAllBegins, klasRain2016$testAllBegins)
## [1] TRUE
identical(testAllEnds, klasRain2016$testAllEnds)
## [1] FALSE
# Show the key parameters used
klasRain2016$keyParams
## $fileName
## [1] "klas2016METAR"
## 
## $pType
## [1] "RA"
## 
## $endExclude
## [1] "2016-05-01 0042"
## 
## $beginExclude
## NULL
## 
## $maxProb
## [1] 1000
## 
## $sState
## [1] FALSE

The full functions can then be explored for Chicago, IL 2016 rain plots:

# Run for Chicago, IL 2016 rainfall - run with no plots while debugging begin/end exclude
kordRain2016 <- runFullPrecipExtraction(kord2016METAR, 
                                        pType="RA", 
                                        titleText="Chicago, IL Rainfall (hours) in 2016", 
                                        yAxisText="Hours of Rain", 
                                        endExclude=c(),
                                        beginExclude=c(), 
                                        maxProb=1440, 
                                        sState=FALSE, 
                                        makePlots=FALSE
                                        )
## 
## Regex search code is: (RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8256
## 2 TRUE          0     1   179
## 3 TRUE          1     0   177
## 4 TRUE          1     1   168
## 5 TRUE          1     2    14
## 6 TRUE          2     1    14
## 7 TRUE          2     2     7
## 
## Are there any problems with duplicated keys? FALSE
## Warning in unclass(time1) - unclass(time2): longer object length is not a
## multiple of shorter object length
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -31587.0  -2464.5   -355.0    703.3   -121.0 509393.0 
## 
## Problem Detected - Intervals are not positive.  Data to help investigate
## 
## Vector of Begins
##  [1] "2016-02-02 23:56:00 UTC" "2016-02-03 18:13:00 UTC"
##  [3] "2016-02-03 22:40:00 UTC" "2016-02-29 01:17:00 UTC"
##  [5] "2016-03-01 02:48:00 UTC" "2016-03-01 04:36:00 UTC"
##  [7] "2016-03-01 09:44:00 UTC" "2016-03-01 11:01:00 UTC"
##  [9] "2016-03-07 16:43:00 UTC" "2016-03-08 10:46:00 UTC"
## [11] "2016-03-09 05:56:00 UTC"
## 
## Vector of Ends
##  [1] "2016-02-03 01:25:00 UTC" "2016-02-03 18:43:00 UTC"
##  [3] "2016-02-03 23:00:00 UTC" "2016-02-29 02:39:00 UTC"
##  [5] "2016-03-01 03:11:00 UTC" "2016-03-01 04:14:00 UTC"
##  [7] "2016-03-01 05:30:00 UTC" "2016-03-01 10:17:00 UTC"
##  [9] "2016-03-01 11:23:00 UTC" "2016-03-07 17:49:00 UTC"
## [11] "2016-03-08 10:55:00 UTC"
## 
## 
## Potential problem Detected - very long.  Data to help investigate
## 
## Positions with problems are: 402 403
## Vector of Begins
## [1] "2016-12-25 22:11:00 UTC" "2016-12-26 04:56:00 UTC"
## [3] "2016-12-26 08:23:00 UTC" "2016-12-26 08:57:00 UTC"
## [5] "2016-12-26 12:46:00 UTC"
## 
## Vector of Ends
## [1] "2016-12-23 23:32:00 UTC" "2016-12-24 03:43:00 UTC"
## [3] "2016-12-25 23:40:00 UTC" "2016-12-26 05:20:00 UTC"
## [5] "2016-12-26 08:37:00 UTC"
# Exclude the first problem end time - 2016-03-01 04:14:00 UTC
kordRain2016 <- runFullPrecipExtraction(kord2016METAR, 
                                        pType="RA", 
                                        titleText="Chicago, IL Rainfall (hours) in 2016", 
                                        yAxisText="Hours of Rain", 
                                        endExclude=c("2016-03-01 0414"),
                                        beginExclude=c(), 
                                        maxProb=1440, 
                                        sState=FALSE, 
                                        makePlots=FALSE
                                        )
## 
## Regex search code is: (RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8256
## 2 TRUE          0     1   179
## 3 TRUE          1     0   177
## 4 TRUE          1     1   168
## 5 TRUE          1     2    14
## 6 TRUE          2     1    14
## 7 TRUE          2     2     7
## 
## Are there any problems with duplicated keys? FALSE
## Warning in unclass(time1) - unclass(time2): longer object length is not a
## multiple of shorter object length
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -30696.0   -253.0    -42.0    515.3      5.0 509428.0 
## 
## Problem Detected - Intervals are not positive.  Data to help investigate
## 
## Vector of Begins
##  [1] "2016-03-24 03:38:00 UTC" "2016-03-24 05:49:00 UTC"
##  [3] "2016-03-24 07:38:00 UTC" "2016-03-24 11:32:00 UTC"
##  [5] "2016-03-24 14:41:00 UTC" "2016-03-27 19:59:00 UTC"
##  [7] "2016-03-30 13:42:00 UTC" "2016-03-30 16:34:00 UTC"
##  [9] "2016-03-30 21:13:00 UTC" "2016-03-30 23:27:00 UTC"
## [11] "2016-03-31 01:09:00 UTC"
## 
## Vector of Ends
##  [1] "2016-03-24 05:09:00 UTC" "2016-03-24 05:51:00 UTC"
##  [3] "2016-03-24 08:20:00 UTC" "2016-03-24 14:01:00 UTC"
##  [5] "2016-03-24 18:41:00 UTC" "2016-03-24 22:14:00 UTC"
##  [7] "2016-03-28 00:49:00 UTC" "2016-03-30 13:50:00 UTC"
##  [9] "2016-03-30 16:59:00 UTC" "2016-03-30 21:25:00 UTC"
## [11] "2016-03-31 00:20:00 UTC"
## 
## 
## Potential problem Detected - very long.  Data to help investigate
## 
## Positions with problems are: 402
## Vector of Begins
## [1] "2016-12-25 22:11:00 UTC" "2016-12-26 04:56:00 UTC"
## [3] "2016-12-26 08:23:00 UTC" "2016-12-26 08:57:00 UTC"
## [5] "2016-12-26 12:46:00 UTC"
## 
## Vector of Ends
## [1] "2016-12-24 03:43:00 UTC" "2016-12-25 23:40:00 UTC"
## [3] "2016-12-26 05:20:00 UTC" "2016-12-26 08:37:00 UTC"
## [5] "2016-12-26 12:23:00 UTC"
# Exclude the second problem end time - 2016-03-24 22:14:00 UTC
kordRain2016 <- runFullPrecipExtraction(kord2016METAR, 
                                        pType="RA", 
                                        titleText="Chicago, IL Rainfall (hours) in 2016", 
                                        yAxisText="Hours of Rain", 
                                        endExclude=c("2016-03-01 0414", "2016-03-24 2214"),
                                        beginExclude=c(), 
                                        maxProb=1440, 
                                        sState=FALSE, 
                                        makePlots=FALSE
                                        )
## 
## Regex search code is: (RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8256
## 2 TRUE          0     1   179
## 3 TRUE          1     0   177
## 4 TRUE          1     1   168
## 5 TRUE          1     2    14
## 6 TRUE          2     1    14
## 7 TRUE          2     2     7
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     2.0    15.0    39.0   239.5   119.0 10708.0 
## 
## Potential problem Detected - very long.  Data to help investigate
## 
## Positions with problems are: 202 204 206 207 211 213 214 215 224 227 236 240 244
## Vector of Begins
##  [1] "2016-07-01 03:25:00 UTC" "2016-07-06 07:42:00 UTC"
##  [3] "2016-07-07 22:34:00 UTC" "2016-07-13 13:51:00 UTC"
##  [5] "2016-07-13 21:47:00 UTC" "2016-07-13 23:47:00 UTC"
##  [7] "2016-07-15 19:16:00 UTC" "2016-07-15 21:06:00 UTC"
##  [9] "2016-07-17 13:44:00 UTC" "2016-07-18 04:11:00 UTC"
## [11] "2016-07-22 01:46:00 UTC"
## 
## Vector of Ends
##  [1] "2016-07-01 03:38:00 UTC" "2016-07-06 14:08:00 UTC"
##  [3] "2016-07-08 00:36:00 UTC" "2016-07-13 22:07:00 UTC"
##  [5] "2016-07-14 00:54:00 UTC" "2016-07-15 20:39:00 UTC"
##  [7] "2016-07-15 22:27:00 UTC" "2016-07-17 14:38:00 UTC"
##  [9] "2016-07-18 04:44:00 UTC" "2016-07-22 06:34:00 UTC"
## [11] "2016-07-24 01:00:00 UTC"
# Exclude the begin time that creates a 3-day continuous rainfall - 2016-07-13 2347
kordRain2016 <- runFullPrecipExtraction(kord2016METAR, 
                                        pType="RA", 
                                        titleText="Chicago, IL Rainfall (hours) in 2016", 
                                        yAxisText="Hours of Rain", 
                                        endExclude=c("2016-03-01 0414", "2016-03-24 2214"),
                                        beginExclude=c("2016-07-13 2347"), 
                                        maxProb=1440, 
                                        sState=FALSE, 
                                        makePlots=FALSE
                                        )
## 
## Regex search code is: (RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8256
## 2 TRUE          0     1   179
## 3 TRUE          1     0   177
## 4 TRUE          1     1   168
## 5 TRUE          1     2    14
## 6 TRUE          2     1    14
## 7 TRUE          2     2     7
## 
## Are there any problems with duplicated keys? FALSE
## Warning in unclass(time1) - unclass(time2): longer object length is not a
## multiple of shorter object length
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -30696.0    -44.0     13.0    915.4     54.0 509428.0 
## 
## Problem Detected - Intervals are not positive.  Data to help investigate
## 
## Vector of Begins
##  [1] "2016-08-25 09:29:00 UTC" "2016-08-27 04:07:00 UTC"
##  [3] "2016-08-27 06:24:00 UTC" "2016-08-27 10:40:00 UTC"
##  [5] "2016-08-27 14:06:00 UTC" "2016-08-29 20:59:00 UTC"
##  [7] "2016-08-29 23:15:00 UTC" "2016-08-30 10:23:00 UTC"
##  [9] "2016-09-01 20:08:00 UTC" "2016-09-06 19:54:00 UTC"
## [11] "2016-09-07 17:13:00 UTC"
## 
## Vector of Ends
##  [1] "2016-08-25 09:39:00 UTC" "2016-08-27 05:07:00 UTC"
##  [3] "2016-08-27 10:21:00 UTC" "2016-08-27 13:20:00 UTC"
##  [5] "2016-08-27 14:48:00 UTC" "2016-08-29 20:48:00 UTC"
##  [7] "2016-08-29 21:16:00 UTC" "2016-08-30 00:38:00 UTC"
##  [9] "2016-08-30 13:56:00 UTC" "2016-09-01 20:18:00 UTC"
## [11] "2016-09-06 20:11:00 UTC"
## 
## 
## Potential problem Detected - very long.  Data to help investigate
## 
## Positions with problems are: 401
## Vector of Begins
## [1] "2016-12-25 22:11:00 UTC" "2016-12-26 04:56:00 UTC"
## [3] "2016-12-26 08:23:00 UTC" "2016-12-26 08:57:00 UTC"
## [5] "2016-12-26 12:46:00 UTC"
## 
## Vector of Ends
## [1] "2016-12-24 03:43:00 UTC" "2016-12-25 23:40:00 UTC"
## [3] "2016-12-26 05:20:00 UTC" "2016-12-26 08:37:00 UTC"
## [5] "2016-12-26 12:23:00 UTC"
# Exclude the third problem end time - 2016-08-29 20:48:00 UTC
# As this is the final data integrity issue, show the plots
kordRain2016 <- runFullPrecipExtraction(kord2016METAR, 
                                        pType="RA", 
                                        titleText="Chicago, IL Rainfall (hours) in 2016", 
                                        yAxisText="Hours of Rain", 
                                        endExclude=c("2016-03-01 0414", "2016-03-24 2214", 
                                                     "2016-08-29 2048"
                                                     ),
                                        beginExclude=c("2016-07-13 2347"), 
                                        maxProb=1440, 
                                        sState=FALSE, 
                                        makePlots=TRUE
                                        )
## 
## Regex search code is: (RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8256
## 2 TRUE          0     1   179
## 3 TRUE          1     0   177
## 4 TRUE          1     1   168
## 5 TRUE          1     2    14
## 6 TRUE          2     1    14
## 7 TRUE          2     2     7
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00   14.00   33.50   71.33   86.00 1243.00
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Show the key parameters used
kordRain2016$keyParams
## $fileName
## [1] "kord2016METAR"
## 
## $pType
## [1] "RA"
## 
## $endExclude
## [1] "2016-03-01 0414" "2016-03-24 2214" "2016-08-29 2048"
## 
## $beginExclude
## [1] "2016-07-13 2347"
## 
## $maxProb
## [1] 1440
## 
## $sState
## [1] FALSE

The full functions can then be explored for Chicago, IL 2016 snow plots:

# Run for Chicago, IL 2016 snowfall - run with no plots while debugging begin/end exclude
kordSnow2016 <- runFullPrecipExtraction(kord2016METAR, 
                                        pType="SN", 
                                        titleText="Chicago, IL Snowfall (hours) in 2016", 
                                        yAxisText="Hours of Snow", 
                                        endExclude=c(),
                                        beginExclude=c(), 
                                        maxProb=1440, 
                                        sState=FALSE, 
                                        makePlots=FALSE
                                        )
## 
## Regex search code is: (SN[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 6 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8630
## 2 TRUE          0     1    69
## 3 TRUE          1     0    62
## 4 TRUE          1     1    44
## 5 TRUE          1     2     4
## 6 TRUE          2     1     6
## 
## Are there any problems with duplicated keys? FALSE
## Warning in unclass(time1) - unclass(time2): longer object length is not a
## multiple of shorter object length
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -348118   -2604     -97    7747      33  520190 
## 
## Problem Detected - Intervals are not positive.  Data to help investigate
## 
## Vector of Begins
##  [1] "2016-02-09 00:53:00 UTC" "2016-02-09 02:48:00 UTC"
##  [3] "2016-02-09 18:08:00 UTC" "2016-02-09 18:55:00 UTC"
##  [5] "2016-02-09 23:25:00 UTC" "2016-02-10 02:36:00 UTC"
##  [7] "2016-02-10 04:19:00 UTC" "2016-02-12 13:32:00 UTC"
##  [9] "2016-02-12 14:28:00 UTC" "2016-02-12 17:15:00 UTC"
## [11] "2016-02-14 17:06:00 UTC"
## 
## Vector of Ends
##  [1] "2016-02-09 01:46:00 UTC" "2016-02-09 17:19:00 UTC"
##  [3] "2016-02-09 18:41:00 UTC" "2016-02-09 22:30:00 UTC"
##  [5] "2016-02-09 23:33:00 UTC" "2016-02-10 02:16:00 UTC"
##  [7] "2016-02-10 03:38:00 UTC" "2016-02-10 04:31:00 UTC"
##  [9] "2016-02-12 13:45:00 UTC" "2016-02-12 16:00:00 UTC"
## [11] "2016-02-12 17:45:00 UTC"
## 
## 
## Potential problem Detected - very long.  Data to help investigate
## 
## Positions with problems are: 123 124 125 126 127
## Vector of Begins
## [1] "2016-12-23 20:18:00 UTC" "2016-12-23 23:01:00 UTC"
## [3] "2016-12-24 03:43:00 UTC" "2016-12-29 18:33:00 UTC"
## [5] "2016-12-30 02:46:00 UTC"
## 
## Vector of Ends
## [1] "2016-12-17 09:46:00 UTC" "2016-12-17 20:09:00 UTC"
## [3] "2016-12-18 11:07:00 UTC" "2016-12-23 17:50:00 UTC"
## [5] "2016-12-23 20:34:00 UTC"
# After investigation, there are five extraneous end times in the Chicago snowfall data - exclude them
# Create the plots
kordSnow2016 <- runFullPrecipExtraction(kord2016METAR, 
                                        pType="SN", 
                                        titleText="Chicago, IL Snowfall (hours) in 2016", 
                                        yAxisText="Hours of Snow", 
                                        endExclude=c("2016-02-10 0216", "2016-03-24 2331", 
                                                     "2016-04-08 2300", "2016-04-09 0446",
                                                     "2016-12-24 0309"
                                                     ),
                                        beginExclude=c(), 
                                        maxProb=1440, 
                                        sState=FALSE, 
                                        makePlots=TRUE
                                        )
## 
## Regex search code is: (SN[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 6 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8630
## 2 TRUE          0     1    69
## 3 TRUE          1     0    62
## 4 TRUE          1     1    44
## 5 TRUE          1     2     4
## 6 TRUE          2     1     6
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3.00   20.25   47.00  136.29  152.25 1016.00
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Show the key parameters used
kordSnow2016$keyParams
## $fileName
## [1] "kord2016METAR"
## 
## $pType
## [1] "SN"
## 
## $endExclude
## [1] "2016-02-10 0216" "2016-03-24 2331" "2016-04-08 2300" "2016-04-09 0446"
## [5] "2016-12-24 0309"
## 
## $beginExclude
## NULL
## 
## $maxProb
## [1] 1440
## 
## $sState
## [1] FALSE

Example #30: Checking Intervals for Consistency

The intervals created above can be compared against the hourly METAR observations for consistency. In general, RAE53 would lead to no precipitation recorded at 53Z while RAB53 would lead to precipitation recorded at 53Z. So, the intervals created should be reduced by 1 for purposes of overlap analysis.

Example code includes:

library(lubridate)  # lubridate has the %within% function and creates and checks intervals
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
intervalConsistency <- function(lst, pType){

    # Extract the beginning and interval times
    begins <- lst[["testAllBegins"]]
    ends <- lst[["testAllEnds"]]
    durs <- lst[["testIntervals"]]

    # Create intervals from the raw list file
    precipInts <- interval(begins, begins + durs - 1)

    
    # Extract the METAR and date-time information
    metar <- lst[["testFileProc"]][["origMETAR"]]
    dtime <- lst[["testFileProc"]][["dtime"]]
    
    # Take each METAR observation and check two factors
    # Is the precipitation type recorded in that METAR?
    # Does that METAR fall in any of the intervals?
    precipMETAR <- grepl(paste0(pType, ".*RMK"), metar)
    intMETAR <- sapply(dtime, FUN=function(x) {x %within% precipInts %>% any()})

    # Check for the consistency of the observations and print the mismatches
    print(table(precipMETAR, intMETAR))

    mism <- which(precipMETAR != intMETAR)
    if (length(mism) == 0) {
        cat("\nFull matches between METAR observations and intervals\n")
    } else {
        for (x in mism) {
            cat("\nMismatch at time", strftime(dtime[x], format="%Y-%m-%d %H:%M"), "UTC\n")
            print(metar[max(1, x-2):min(length(metar), x+2)])
        }
    }
    
    list(precipInts=precipInts, mismatches=mism, mismatchTimes=dtime[mism])
}

# Mismatch due to TSRA without RAB on 2016-04-30
tmp <- intervalConsistency(klasRain2016, pType="RA")
##            intMETAR
## precipMETAR FALSE TRUE
##       FALSE  8710    0
##       TRUE      1  107
## 
## Mismatch at time 2016-04-30 18:56 UTC
## [1] "KLAS 302156Z 01012G18KT 10SM FEW045 BKN060 BKN110 18/07 A2973 RMK AO2 SLP055 T01830072 $"                                                                                           
## [2] "KLAS 302256Z 04009KT 10SM FEW050 BKN090 OVC110 18/07 A2972 RMK AO2 SLP054 SH DSNT SE-S T01780067 $"                                                                                 
## [3] "KLAS 302356Z 04026G31KT 6SM TSRA SCT035 BKN080CB OVC100 16/10 A2978 RMK AO2 PK WND 04031/2356 PRESRR SLP075 OCNL LTGICCG N TS N-NE MOV SW P0000 60000 T01610100 10189 20106 53013 $"
## [4] "KLAS 010056Z 17004KT 10SM FEW026 BKN055 BKN100 14/11 A2976 RMK AO2 PK WND 04033/2357 SLP070 TSE19 MOV SW RAE42 P0037 T01390106 $"                                                   
## [5] "KLAS 010156Z 27007KT 10SM FEW050 BKN095 BKN220 14/09 A2975 RMK AO2 SLP068 T01390094 $"
# Mismatches due to malformatted METAR with missing data on 2016-07-13
# Mismatch due to RA vs FZRA on 2016-02-29
# Mismatch due to -RASN without RAB on 2016-03-24
# Mismatch due to TSRA without RAB on 2016-08-29
tmp <- intervalConsistency(kordRain2016, pType="RA")
##            intMETAR
## precipMETAR FALSE TRUE
##       FALSE  8342    4
##       TRUE      3  466
## 
## Mismatch at time 2016-02-29 21:51 UTC
## [1] "KORD 010151Z 06012KT 10SM -DZ OVC070 00/M06 A3002 RMK AO2 SLP172 P0000 T00001061"                                                                  
## [2] "KORD 010251Z 05011KT 10SM -RA BKN050 BKN070 OVC090 M01/M05 A3004 RMK AO2 DZE48RAB48 SLP178 60000 P0000 T10061050 51017"                            
## [3] "KORD 010351Z 06014G21KT 10SM -FZRA FEW017 SCT045 OVC055 M02/M05 A3003 RMK AO2 RAE11FZRAB39SNB11E39 SLP175 P0000 I1000 T10171050"                   
## [4] "KORD 010451Z 06015KT 10SM -FZRA BKN014 OVC019 M02/M05 A3001 RMK AO2 SLP169 FZRAE14B36 P0003 I1000 T10221050"                                       
## [5] "KORD 010551Z 07013KT 10SM BKN014 OVC070 M02/M05 A3000 RMK AO2 FZRAE30PLB09E48 SLP166 P0000 60000 I1000 I6000 T10221050 10006 21022 401111022 56011"
## 
## Mismatch at time 2016-03-24 16:51 UTC
## [1] "KORD 241951Z 01019KT 10SM BKN010 OVC015 03/02 A2948 RMK AO2 SLP987 T00330017 $"                                         
## [2] "KORD 242051Z 34014G22KT 10SM OVC010 03/01 A2953 RMK AO2 PRESRR SLP004 T00280011 53009 $"                                
## [3] "KORD 242151Z 34014KT 1 3/4SM -RASN BR SCT010 OVC016 01/00 A2960 RMK AO2 SLP029 T00110000 $"                             
## [4] "KORD 242251Z 35011G19KT 5SM -SN BR SCT007 BKN019 OVC046 01/00 A2962 RMK AO2 RAE14 SLP037 T00110000 $"                   
## [5] "KORD 242351Z 34016G23KT 10SM FEW011 BKN018 OVC025 01/M02 A2968 RMK AO2 SLP055 SNE31 60012 T00061017 10039 20006 53025 $"
## 
## Mismatch at time 2016-07-13 13:51 UTC
## [1] "KORD 131251Z 00000KT 10SM FEW055 FEW075 BKN250 25/21 A2990 RMK AO2 SLP118 T02500211"               
## [2] "KORD 131351Z 18009KT 9SM -RA FEW070 BKN090 BKN250 26/22 A2991 RMK AO2 RAB51 SLP124 P0000 T02560217"
## [3] "KORD 131851Z 22009G18KT 10SM FEW027 SCT047 BKN250 28/22 A2984 RMK AO2 SLP099 T02780217"            
## [4] "KORD 131951Z 23008KT 10SM BKN030 OVC070 29/22 A2981 RMK AO2 SLP089 T02890222"                      
## [5] "KORD 132051Z 21009KT 10SM SCT025 BKN033 BKN040 28/22 A2980 RMK AO2 SLP087 T02830222 56022"         
## 
## Mismatch at time 2016-07-13 14:51 UTC
## [1] "KORD 131351Z 18009KT 9SM -RA FEW070 BKN090 BKN250 26/22 A2991 RMK AO2 RAB51 SLP124 P0000 T02560217"        
## [2] "KORD 131851Z 22009G18KT 10SM FEW027 SCT047 BKN250 28/22 A2984 RMK AO2 SLP099 T02780217"                    
## [3] "KORD 131951Z 23008KT 10SM BKN030 OVC070 29/22 A2981 RMK AO2 SLP089 T02890222"                              
## [4] "KORD 132051Z 21009KT 10SM SCT025 BKN033 BKN040 28/22 A2980 RMK AO2 SLP087 T02830222 56022"                 
## [5] "KORD 132151Z 20010KT 10SM -RA FEW060 SCT080 BKN210 OVC250 28/23 A2979 RMK AO2 RAB47 SLP081 P0000 T02830233"
## 
## Mismatch at time 2016-07-13 15:51 UTC
## [1] "KORD 131851Z 22009G18KT 10SM FEW027 SCT047 BKN250 28/22 A2984 RMK AO2 SLP099 T02780217"                    
## [2] "KORD 131951Z 23008KT 10SM BKN030 OVC070 29/22 A2981 RMK AO2 SLP089 T02890222"                              
## [3] "KORD 132051Z 21009KT 10SM SCT025 BKN033 BKN040 28/22 A2980 RMK AO2 SLP087 T02830222 56022"                 
## [4] "KORD 132151Z 20010KT 10SM -RA FEW060 SCT080 BKN210 OVC250 28/23 A2979 RMK AO2 RAB47 SLP081 P0000 T02830233"
## [5] "KORD 132251Z 22008G17KT 10SM FEW060 FEW085 SCT210 OVC250 28/22 A2977 RMK AO2 RAE07 SLP077 P0000 T02780222" 
## 
## Mismatch at time 2016-07-13 17:51 UTC
## [1] "KORD 132051Z 21009KT 10SM SCT025 BKN033 BKN040 28/22 A2980 RMK AO2 SLP087 T02830222 56022"                                                                                 
## [2] "KORD 132151Z 20010KT 10SM -RA FEW060 SCT080 BKN210 OVC250 28/23 A2979 RMK AO2 RAB47 SLP081 P0000 T02830233"                                                                
## [3] "KORD 132251Z 22008G17KT 10SM FEW060 FEW085 SCT210 OVC250 28/22 A2977 RMK AO2 RAE07 SLP077 P0000 T02780222"                                                                 
## [4] "KORD 132351Z 26010KT 10SM -TSRA FEW055 SCT085CB BKN160 OVC180 27/22 A2978 RMK AO2 RAB47 TSB35 SLP077 OCNL LTGIC VC W TS VC W MOV E P0000 60000 T02720222 10289 20272 55009"
## [5] "KORD 140051Z 16017G26KT 10SM -TSRA FEW043 SCT090CB BKN110 BKN160 22/21 A2972 RMK AO2 PK WND 13030/0035 SLP059 FRQ LTGIC VC E-SE TS VC E-SE MOV E P0004 T02170206"          
## 
## Mismatch at time 2016-08-29 14:51 UTC
## [1] "KORD 291751Z 09006KT 10SM SCT026 SCT048 BKN170 BKN250 29/21 A3023 RMK AO2 SLP233 T02940211 10294 20222 58003"                                                 
## [2] "KORD 291851Z 06006KT 10SM FEW025 BKN048 BKN170 BKN250 30/21 A3021 RMK AO2 SLP226 OCNL LTGIC DSNT SW CB DSNT SW T03000211 $"                                   
## [3] "KORD 291951Z 25009KT 1SM R10L/1200VP6000FT +TSRA BKN041CB BKN055 OVC110 25/21 A3022 RMK AO2 SLP230 TSB11 CONS LTGICCG OHD TS OHD MOV E P0001 T02500211 $"     
## [4] "KORD 292051Z 01005KT 9SM TS SCT055CB BKN110 OVC140 24/22 A3019 RMK AO2 RAE48 SLP217 OCNL LTGICCG VC NW-W-NE TS VC NW-N-NE MOV E P0039 60040 T02390222 58010 $"
## [5] "KORD 292151Z 02005KT 10SM FEW050 FEW075 SCT140 BKN180 OVC250 25/22 A3018 RMK AO2 RAB2059E16 TSE06 SLP215 P0000 T02500222 $"
# Mismatches due to SN without SNB on 2016-02-09, 2016-03-24, 2016-04-08, 2016-12-23
tmp <- intervalConsistency(kordSnow2016, pType="SN")
##            intMETAR
## precipMETAR FALSE TRUE
##       FALSE  8532    0
##       TRUE      7  276
## 
## Mismatch at time 2016-02-09 18:51 UTC
## [1] "KORD 092251Z 32012KT 10SM SCT032 OVC060 M08/M14 A2990 RMK AO2 SLP136 SNE30 P0000 T10831139"                            
## [2] "KORD 092351Z 31014KT 10SM SCT028 BKN036 OVC070 M09/M14 A2992 RMK AO2 SNB25E33 SLP144 60000 T10891144 11078 21089 53023"
## [3] "KORD 100051Z 31012KT 4SM -SN FEW024 OVC040 M09/M13 A2993 RMK AO2 SLP148 P0003 T10941133"                               
## [4] "KORD 100151Z 29011KT 5SM -SN OVC040 M09/M13 A2994 RMK AO2 SLP150 P0002 T10941133"                                      
## [5] "KORD 100251Z 29011KT 3SM -SN SCT021 OVC031 M09/M12 A2994 RMK AO2 SNE16B36 SLP150 60000 T10941122 51006"                
## 
## Mismatch at time 2016-02-09 19:51 UTC
## [1] "KORD 092351Z 31014KT 10SM SCT028 BKN036 OVC070 M09/M14 A2992 RMK AO2 SNB25E33 SLP144 60000 T10891144 11078 21089 53023"
## [2] "KORD 100051Z 31012KT 4SM -SN FEW024 OVC040 M09/M13 A2993 RMK AO2 SLP148 P0003 T10941133"                               
## [3] "KORD 100151Z 29011KT 5SM -SN OVC040 M09/M13 A2994 RMK AO2 SLP150 P0002 T10941133"                                      
## [4] "KORD 100251Z 29011KT 3SM -SN SCT021 OVC031 M09/M12 A2994 RMK AO2 SNE16B36 SLP150 60000 T10941122 51006"                
## [5] "KORD 100351Z 31012KT 10SM BKN042 OVC085 M10/M14 A2994 RMK AO2 SNE38 SLP149 P0002 T11001139"                            
## 
## Mismatch at time 2016-03-24 16:51 UTC
## [1] "KORD 241951Z 01019KT 10SM BKN010 OVC015 03/02 A2948 RMK AO2 SLP987 T00330017 $"                                         
## [2] "KORD 242051Z 34014G22KT 10SM OVC010 03/01 A2953 RMK AO2 PRESRR SLP004 T00280011 53009 $"                                
## [3] "KORD 242151Z 34014KT 1 3/4SM -RASN BR SCT010 OVC016 01/00 A2960 RMK AO2 SLP029 T00110000 $"                             
## [4] "KORD 242251Z 35011G19KT 5SM -SN BR SCT007 BKN019 OVC046 01/00 A2962 RMK AO2 RAE14 SLP037 T00110000 $"                   
## [5] "KORD 242351Z 34016G23KT 10SM FEW011 BKN018 OVC025 01/M02 A2968 RMK AO2 SLP055 SNE31 60012 T00061017 10039 20006 53025 $"
## 
## Mismatch at time 2016-03-24 17:51 UTC
## [1] "KORD 242051Z 34014G22KT 10SM OVC010 03/01 A2953 RMK AO2 PRESRR SLP004 T00280011 53009 $"                                
## [2] "KORD 242151Z 34014KT 1 3/4SM -RASN BR SCT010 OVC016 01/00 A2960 RMK AO2 SLP029 T00110000 $"                             
## [3] "KORD 242251Z 35011G19KT 5SM -SN BR SCT007 BKN019 OVC046 01/00 A2962 RMK AO2 RAE14 SLP037 T00110000 $"                   
## [4] "KORD 242351Z 34016G23KT 10SM FEW011 BKN018 OVC025 01/M02 A2968 RMK AO2 SLP055 SNE31 60012 T00061017 10039 20006 53025 $"
## [5] "KORD 250051Z 35016G21KT 10SM SCT017 OVC025 00/M02 A2971 RMK AO2 PK WND 36029/0031 SLP066 T00001022 $"                   
## 
## Mismatch at time 2016-04-08 17:51 UTC
## [1] "KORD 082051Z 29018G24KT 10SM FEW046 SCT055 OVC090 04/M03 A2981 RMK AO2 PK WND 30035/2025 SNE03B28E40 SLP098 P0000 60000 T00441033 50002"
## [2] "KORD 082151Z 32016G25KT 9SM BKN034 BKN046 OVC055 03/M03 A2982 RMK AO2 PK WND 32030/2057 SNB29E46 SLP103 P0000 T00281033"                
## [3] "KORD 082251Z 31014G31KT 9SM -SN SCT045 BKN050 OVC080 02/M06 A2983 RMK AO2 PK WND 32031/2243 SLP108 P0000 T00221061"                     
## [4] "KORD 082351Z 31014G25KT 10SM SCT060 SCT080 03/M09 A2984 RMK AO2 SNE00 SLP112 P0000 60000 T00281089 10050 20017 52012"                   
## [5] "KORD 090051Z 35012G21KT 10SM FEW060 SCT080 01/M07 A2987 RMK AO2 PK WND 32030/2355 SLP121 T00111072"                                     
## 
## Mismatch at time 2016-12-23 19:51 UTC
## [1] "KORD 232351Z 14008KT 6SM BR SCT037 OVC055 01/M01 A2996 RMK AO2 RAE01B27E32SNB01E48 SLP152 4/003 P0003 60006 T00111006 10017 20011 56024"
## [2] "KORD 240051Z 20007KT 6SM BR FEW013 OVC055 01/M01 A2997 RMK AO2 SLP153 T00111006"                                                        
## [3] "KORD 240151Z 17011KT 4SM -SN BR SCT015 OVC039 01/M01 A2993 RMK AO2 SLP143 P0000 T00111006"                                              
## [4] "KORD 240251Z 20007KT 2SM -SN BR OVC008 01/00 A2994 RMK AO2 SLP143 P0000 60000 T00110000 58008"                                          
## [5] "KORD 240351Z 21008KT 2 1/2SM -SN BR OVC006 01/00 A2992 RMK AO2 RAB09E43SNE09B43 SLP138 P0004 T00060000"                                 
## 
## Mismatch at time 2016-12-23 20:51 UTC
## [1] "KORD 240051Z 20007KT 6SM BR FEW013 OVC055 01/M01 A2997 RMK AO2 SLP153 T00111006"                       
## [2] "KORD 240151Z 17011KT 4SM -SN BR SCT015 OVC039 01/M01 A2993 RMK AO2 SLP143 P0000 T00111006"             
## [3] "KORD 240251Z 20007KT 2SM -SN BR OVC008 01/00 A2994 RMK AO2 SLP143 P0000 60000 T00110000 58008"         
## [4] "KORD 240351Z 21008KT 2 1/2SM -SN BR OVC006 01/00 A2992 RMK AO2 RAB09E43SNE09B43 SLP138 P0004 T00060000"
## [5] "KORD 240451Z 23007KT 2SM -DZ BR OVC006 01/00 A2992 RMK AO2 DZB20SNE10 SLP139 P0000 T00110000"

Example #31: All Precipitation Types

The METAR data can be explored to find all the precipitation types it contains. Broadly, a precipitation type should meet five criteria:

  • Occurs prior to RMK (the remarks section has letter-only data that is not precipitation)
  • Preceded by a space
  • Optionally, begins with + (heavy) or - (light)
  • Contains only capital letters (no numbers) after the optional +/-
  • Trailed by a space

This will extract some non-precipitation types such as CLR and AUTO, so some iteration is expected.

Example code includes:

# Function to find precipitation types
findPrecipTypes <- function(lst, precipRegex="(?<= )[+-]?[A-Z]+(?= )", priorTo="RMK", exclTypes=NULL) {

    metar <- lst[["testFileProc"]][["origMETAR"]]
    
    # Keep only everything prior to priorTo
    if (!is.null(priorTo)) {
        rmkLoc <- str_locate(metar, priorTo)
        rmkLoc[is.na(rmkLoc)] <- -1  # keep everything if NA
        metar <- str_sub(metar, start=1, end=rmkLoc[, 1]-1)
    }
    
    # Exclude any items from the METAR
    if (!is.null(exclTypes)) {
        for (excl in exclTypes) {
            metar <- str_replace(metar, excl, "")
        }
    }
    
    # Is there a possible precipitation type in the file?
    pExists <- grepl(precipRegex, metar, perl=TRUE)
    
    cat("\nPrecipitation data status by METAR record\n\n")
    print(table(pExists))
    cat("\n")
    
    # Find the precipitation matches
    precipMatches <- str_match_all(metar, pattern=precipRegex)
    
    # Confirm that there are a maximum of two precipitation types per METAR
    listLengths <- sapply(precipMatches, FUN=length)
    if (max(listLengths) > 2) { 
        stop("Hard-coded for at most 2 precipitation matches, please investigate")
    } else if (max(listLengths)==0) {
        cat("\nNo precipitation detected in this file\n")
    }
    
    if (max(listLengths)==2) {
        cat("\nMultiple Precipitation types in the same record include\n")
        sapply(precipMatches[listLengths==2], FUN=paste, collapse=" ") %>% 
            table() %>% 
            sort(decreasing=TRUE) %>%
            print()
    }
    
    if (sum(listLengths==1) > 0) {
        cat("\nSingle Precipitation types in the same record include\n")
        sapply(precipMatches[listLengths==1], FUN=c) %>% 
            table() %>% 
            sort(decreasing=TRUE) %>%
            print()
    }    

    # Extract the second column and summarize precipitation types
    # table(precipMatches[, 2]) %>% sort(decreasing=TRUE) %>% print()
    
    precipMatches
}

# Original pass for Chicago, IL
x1 <- findPrecipTypes(kordRain2016)
## 
## Precipitation data status by METAR record
## 
## pExists
## FALSE  TRUE 
##  6792  2023 
## 
## 
## Multiple Precipitation types in the same record include
## .
##     -RA BR     -SN BR     -DZ BR      RA BR    TSRA BR   +TSRA BR     +RA BR 
##        151        108         47         34         11         10          7 
##      SN FG     BR CLR    SN FZFG -FZDZSN BR   -TSRA BR     -DZ FG   -FZDZ BR 
##          7          6          6          4          3          2          2 
##   -RASN BR     -PL BR   -RAPL BR   -SNPL BR     +SN FG   +SN FZFG    BR BCFG 
##          2          1          1          1          1          1          1 
##      DZ BR     HZ CLR    MIFG BR      TS BR   VCTS -RA    VCTS BR 
##          1          1          1          1          1          1 
## 
## Single Precipitation types in the same record include
## .
##     CLR      BR     -RA     -SN   -TSRA      FG      HZ     -DZ      TS   +TSRA 
##     805     327     210     150      26      25      24      13       9       3 
##      RA    TSRA    VCTS   -FZRA    AUTO    FZFG -FZRAPL   -RASN   -SNPL      SN 
##       3       3       3       2       2       2       1       1       1       1
# Exclude AUTO and CLR as they are a record state and cloud type, not precipitation
x2 <- findPrecipTypes(kordRain2016, exclTypes=c("CLR", "AUTO"))
## 
## Precipitation data status by METAR record
## 
## pExists
## FALSE  TRUE 
##  7599  1216 
## 
## 
## Multiple Precipitation types in the same record include
## .
##     -RA BR     -SN BR     -DZ BR      RA BR    TSRA BR   +TSRA BR     +RA BR 
##        151        108         47         34         11         10          7 
##      SN FG    SN FZFG -FZDZSN BR   -TSRA BR     -DZ FG   -FZDZ BR   -RASN BR 
##          7          6          4          3          2          2          2 
##     -PL BR   -RAPL BR   -SNPL BR     +SN FG   +SN FZFG    BR BCFG      DZ BR 
##          1          1          1          1          1          1          1 
##    MIFG BR      TS BR   VCTS -RA    VCTS BR 
##          1          1          1          1 
## 
## Single Precipitation types in the same record include
## .
##      BR     -RA     -SN   -TSRA      FG      HZ     -DZ      TS   +TSRA      RA 
##     333     210     150      26      25      25      13       9       3       3 
##    TSRA    VCTS   -FZRA    FZFG -FZRAPL   -RASN   -SNPL      SN 
##       3       3       2       2       1       1       1       1
# Original pass for Las Vegas, NV
x3 <- findPrecipTypes(klasRain2016)
## 
## Precipitation data status by METAR record
## 
## pExists
## FALSE  TRUE 
##  6205  2613 
## 
## 
## Multiple Precipitation types in the same record include
## .
##   AUTO CLR VCBLDU CLR     -RA BR      RA BR   -TSRA BR   -RA VCFG     +RA BR 
##          7          7          5          3          2          1          1 
##      FU HZ    HZ BLDU    VCTS RA 
##          1          1          1 
## 
## Single Precipitation types in the same record include
## .
##    CLR    -RA VCBLDU  -TSRA     TS     HZ   VCFG     BR   AUTO     RA   VCTS 
##   2393     72     54     17     13      8      8      4      3      3      3 
##     DU   TSRA  +TSRA   BLDU 
##      2      2      1      1
# Exclude AUTO and CLR as they are a record state and cloud type, not precipitation
x4 <- findPrecipTypes(klasRain2016, exclTypes=c("CLR", "AUTO"))
## 
## Precipitation data status by METAR record
## 
## pExists
## FALSE  TRUE 
##  8608   210 
## 
## 
## Multiple Precipitation types in the same record include
## .
##   -RA BR    RA BR -TSRA BR -RA VCFG   +RA BR    FU HZ  HZ BLDU  VCTS RA 
##        5        3        2        1        1        1        1        1 
## 
## Single Precipitation types in the same record include
## .
##    -RA VCBLDU  -TSRA     TS     HZ   VCFG     BR     RA   VCTS     DU   TSRA 
##     72     61     17     13      8      8      4      3      3      2      2 
##  +TSRA   BLDU 
##      1      1